add reasoning level for gpt-5 family

Parham-alizadeh · Parham-alizadeh · commit ef6101683371 · 2025-08-26T15:54:09.000+01:00
diff --git a/node-zerox/src/index.ts b/node-zerox/src/index.ts
@@ -64,6 +64,7 @@ export const zerox = async ({
   imageFormat = "png",
   imageHeight,
   llmParams = {},
+  reasoning_effort,
   maintainFormat = false,
   maxImageSize = 15,
   maxRetries = 1,
@@ -79,6 +80,10 @@ export const zerox = async ({
   trimEdges = true,
 }: ZeroxArgs): Promise<ZeroxOutput> => {
   let extracted: Record<string, unknown> | null = null;
+  // If reasoning is provided, add to llmParams
+  if (reasoning_effort) {
+    llmParams = { ...llmParams, reasoning_effort };
+  }
   let extractedLogprobs: LogprobPage[] = [];
   let inputTokenCount: number = 0;
   let outputTokenCount: number = 0;
diff --git a/node-zerox/src/models/openAI.ts b/node-zerox/src/models/openAI.ts
@@ -96,7 +96,7 @@ export default class OpenAIModel implements ModelInterface {
     priorPage,
     prompt,
   }: CompletionArgs): Promise<CompletionResponse> {
-    const systemPrompt = prompt || SYSTEM_PROMPT_BASE;
+  const systemPrompt = prompt || SYSTEM_PROMPT_BASE;
 
     // Default system message
     const messages: any = [{ role: "system", content: systemPrompt }];
@@ -120,13 +120,18 @@ export default class OpenAIModel implements ModelInterface {
     messages.push({ role: "user", content: imageContents });
 
     try {
+      // If model is GPT-5 and reasoning_effort is provided, add it to payload
+      let payload: any = {
+        messages,
+        model: this.model,
+        ...convertKeysToSnakeCase(this.llmParams ?? null),
+      };
+      if (this.model && this.model.startsWith("gpt-5") && this.llmParams?.reasoning_effort) {
+        payload.reasoning_effort = this.llmParams.reasoning_effort;
+      }
       const response = await axios.post(
         "https://api.openai.com/v1/chat/completions",
-        {
-          messages,
-          model: this.model,
-          ...convertKeysToSnakeCase(this.llmParams ?? null),
-        },
+        payload,
         {
           headers: {
             Authorization: `Bearer ${this.apiKey}`,
diff --git a/node-zerox/src/types.ts b/node-zerox/src/types.ts
@@ -28,6 +28,7 @@ export interface ZeroxArgs {
   imageHeight?: number;
   imageFormat?: "png" | "jpeg";
   llmParams?: Partial<LLMParams>;
+  reasoning_effort?: "minimal" | "low" | "medium" | "high";
   maintainFormat?: boolean;
   maxImageSize?: number;
   maxRetries?: number;
@@ -227,6 +228,7 @@ export interface GoogleLLMParams extends BaseLLMParams {
 export interface OpenAILLMParams extends BaseLLMParams {
   logprobs: boolean;
   maxTokens: number;
+  reasoning_effort?: "minimal" | "low" | "medium" | "high";
 }
 
 // Union type of all provider params
diff --git a/py_zerox/pyzerox/core/zerox.py b/py_zerox/pyzerox/core/zerox.py
@@ -35,6 +35,7 @@ async def zerox(
     temp_dir: Optional[str] = None,
     custom_system_prompt: Optional[str] = None,
     select_pages: Optional[Union[int, Iterable[int]]] = None,
+    reasoning_effort: Optional[str] = None,
     **kwargs
 ) -> ZeroxOutput:
     """
@@ -76,6 +77,11 @@ async def zerox(
         raise FileUnavailable()
     
     # Create an instance of the litellm model interface
+    if reasoning_effort is not None:
+        allowed = {"minimal", "low", "medium", "high"}
+        if reasoning_effort not in allowed:
+            raise ValueError(f"reasoning_effort must be one of {allowed}")
+        kwargs["reasoning_effort"] = reasoning_effort
     vision_model = litellmmodel(model=model,**kwargs)
 
     # override the system prompt if a custom prompt is provided
diff --git a/py_zerox/pyzerox/models/modellitellm.py b/py_zerox/pyzerox/models/modellitellm.py
@@ -92,7 +92,13 @@ async def completion(
         )
 
         try:
-            response = await litellm.acompletion(model=self.model, messages=messages, **self.kwargs)
+            # If model is GPT-5 and reasoning_effort is provided, add it to kwargs
+            call_kwargs = dict(self.kwargs)
+            if self.model and self.model.startswith("gpt-5") and "reasoning_effort" in call_kwargs:
+                allowed = {"minimal", "low", "medium", "high"}
+                if call_kwargs["reasoning_effort"] not in allowed:
+                    raise ValueError(f"reasoning_effort must be one of {allowed}")
+            response = await litellm.acompletion(model=self.model, messages=messages, **call_kwargs)
 
             ## completion response
             response = CompletionResponse(