add reasoning level for gpt-5 family

Parham-alizadeh · Parham-alizadeh · commit d2576956d01b · 2025-08-26T16:15:36.000+01:00
diff --git a/README.md b/README.md
@@ -96,7 +96,6 @@ const result = await zerox({
 
 ### Parameters
 
-```ts
 const result = await zerox({
   // Required
   filePath: "path/to/file",
@@ -132,6 +131,14 @@ const result = await zerox({
 });
 ```
 
+// For GPT-5 models, you can control the reasoning effort:
+// Allowed values: "minimal", "low", "medium", "high"
+// Example:
+// ...
+//   model: ModelOptions.OPENAI_GPT_5,
+//   reasoning_effort: "medium",
+// ...
+
 The `maintainFormat` option tries to return the markdown in a consistent format by passing the output of a prior page in as additional context for the next page. This requires the requests to run synchronously, so it's a lot slower. But valuable if your documents have a lot of tabular data, or frequently have tables that cross pages.
 
 ```
@@ -386,7 +393,6 @@ print(result)
 
 ### Parameters
 
-```python
 async def zerox(
     cleanup: bool = True,
     concurrency: int = 10,
@@ -402,6 +408,12 @@ async def zerox(
   ...
 ```
 
+# For GPT-5 models, you can control the reasoning effort:
+# Allowed values: "minimal", "low", "medium", "high"
+# Example:
+# result = await zerox(file_path=..., model="gpt-5", reasoning_effort="medium")
+
+
 Parameters
 
 - **cleanup** (bool, optional):
@@ -423,6 +435,8 @@ Parameters
   The system prompt to use for the model, this overrides the default system prompt of Zerox.Generally it is not required unless you want some specific behavior. Defaults to None.
 - **select_pages** (Optional[Union[int, Iterable[int]]], optional):
   Pages to process, can be a single page number or an iterable of page numbers. Defaults to None
+- **reasoning_effort** (str, optional, GPT-5 only):
+  Controls the reasoning effort for GPT-5 models. Allowed values: "minimal", "low", "medium", "high". Defaults to None.
 - **kwargs** (dict, optional):
   Additional keyword arguments to pass to the litellm.completion method.
   Refer to the LiteLLM Documentation and Completion Input for details.
diff --git a/node-zerox/src/index.ts b/node-zerox/src/index.ts
@@ -64,6 +64,7 @@ export const zerox = async ({
   imageFormat = "png",
   imageHeight,
   llmParams = {},
+  reasoning_effort,
   maintainFormat = false,
   maxImageSize = 15,
   maxRetries = 1,
@@ -79,6 +80,10 @@ export const zerox = async ({
   trimEdges = true,
 }: ZeroxArgs): Promise<ZeroxOutput> => {
   let extracted: Record<string, unknown> | null = null;
+  // If reasoning is provided, add to llmParams
+  if (reasoning_effort) {
+    llmParams = { ...llmParams, reasoning_effort };
+  }
   let extractedLogprobs: LogprobPage[] = [];
   let inputTokenCount: number = 0;
   let outputTokenCount: number = 0;
diff --git a/node-zerox/src/models/openAI.ts b/node-zerox/src/models/openAI.ts
@@ -96,7 +96,7 @@ export default class OpenAIModel implements ModelInterface {
     priorPage,
     prompt,
   }: CompletionArgs): Promise<CompletionResponse> {
-    const systemPrompt = prompt || SYSTEM_PROMPT_BASE;
+  const systemPrompt = prompt || SYSTEM_PROMPT_BASE;
 
     // Default system message
     const messages: any = [{ role: "system", content: systemPrompt }];
@@ -120,13 +120,18 @@ export default class OpenAIModel implements ModelInterface {
     messages.push({ role: "user", content: imageContents });
 
     try {
+      // If model is GPT-5 and reasoning_effort is provided, add it to payload
+      let payload: any = {
+        messages,
+        model: this.model,
+        ...convertKeysToSnakeCase(this.llmParams ?? null),
+      };
+      if (this.model && this.model.startsWith("gpt-5") && this.llmParams?.reasoning_effort) {
+        payload.reasoning_effort = this.llmParams.reasoning_effort;
+      }
       const response = await axios.post(
         "https://api.openai.com/v1/chat/completions",
-        {
-          messages,
-          model: this.model,
-          ...convertKeysToSnakeCase(this.llmParams ?? null),
-        },
+        payload,
         {
           headers: {
             Authorization: `Bearer ${this.apiKey}`,
diff --git a/node-zerox/src/types.ts b/node-zerox/src/types.ts
@@ -28,6 +28,7 @@ export interface ZeroxArgs {
   imageHeight?: number;
   imageFormat?: "png" | "jpeg";
   llmParams?: Partial<LLMParams>;
+  reasoning_effort?: "minimal" | "low" | "medium" | "high";
   maintainFormat?: boolean;
   maxImageSize?: number;
   maxRetries?: number;
@@ -227,6 +228,7 @@ export interface GoogleLLMParams extends BaseLLMParams {
 export interface OpenAILLMParams extends BaseLLMParams {
   logprobs: boolean;
   maxTokens: number;
+  reasoning_effort?: "minimal" | "low" | "medium" | "high";
 }
 
 // Union type of all provider params
diff --git a/py_zerox/pyzerox/core/zerox.py b/py_zerox/pyzerox/core/zerox.py
@@ -35,6 +35,7 @@ async def zerox(
     temp_dir: Optional[str] = None,
     custom_system_prompt: Optional[str] = None,
     select_pages: Optional[Union[int, Iterable[int]]] = None,
+    reasoning_effort: Optional[str] = None,
     **kwargs
 ) -> ZeroxOutput:
     """
@@ -76,6 +77,11 @@ async def zerox(
         raise FileUnavailable()
     
     # Create an instance of the litellm model interface
+    if reasoning_effort is not None:
+        allowed = {"minimal", "low", "medium", "high"}
+        if reasoning_effort not in allowed:
+            raise ValueError(f"reasoning_effort must be one of {allowed}")
+        kwargs["reasoning_effort"] = reasoning_effort
     vision_model = litellmmodel(model=model,**kwargs)
 
     # override the system prompt if a custom prompt is provided
diff --git a/py_zerox/pyzerox/models/modellitellm.py b/py_zerox/pyzerox/models/modellitellm.py
@@ -92,7 +92,13 @@ async def completion(
         )
 
         try:
-            response = await litellm.acompletion(model=self.model, messages=messages, **self.kwargs)
+            # If model is GPT-5 and reasoning_effort is provided, add it to kwargs
+            call_kwargs = dict(self.kwargs)
+            if self.model and self.model.startswith("gpt-5") and "reasoning_effort" in call_kwargs:
+                allowed = {"minimal", "low", "medium", "high"}
+                if call_kwargs["reasoning_effort"] not in allowed:
+                    raise ValueError(f"reasoning_effort must be one of {allowed}")
+            response = await litellm.acompletion(model=self.model, messages=messages, **call_kwargs)
 
             ## completion response
             response = CompletionResponse(