Skip to content

Commit 4c42b81

Browse files
committed
Introduce a customizable and flexible system role
1 parent 295e2a6 commit 4c42b81

File tree

7 files changed

+48
-7
lines changed

7 files changed

+48
-7
lines changed

README.md

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,14 @@ kwargs = {}
180180

181181
## system prompt to use for the vision model
182182
custom_system_prompt = None
183-
184183
# to override
185184
# custom_system_prompt = "For the below pdf page, do something..something..." ## example
186185

186+
## system role to use for the vision model
187+
custom_role = None
188+
# to override
189+
# custom_role = "user" ## example
190+
187191
###################### Example for OpenAI ######################
188192
model = "gpt-4o-mini" ## openai model
189193
os.environ["OPENAI_API_KEY"] = "" ## your-api-key
@@ -236,7 +240,8 @@ async def main():
236240

237241
output_dir = "./output_test" ## directory to save the consolidated markdown file
238242
result = await zerox(file_path=file_path, model=model, output_dir=output_dir,
239-
custom_system_prompt=custom_system_prompt,select_pages=select_pages, **kwargs)
243+
custom_system_prompt=custom_system_prompt, select_pages=select_pages,
244+
custom_role=custom_role, **kwargs)
240245
return result
241246

242247

@@ -259,6 +264,7 @@ async def zerox(
259264
output_dir: Optional[str] = None,
260265
temp_dir: Optional[str] = None,
261266
custom_system_prompt: Optional[str] = None,
267+
custom_role: Optional[str] = None,
262268
select_pages: Optional[Union[int, Iterable[int]]] = None,
263269
**kwargs
264270
) -> ZeroxOutput:
@@ -283,7 +289,9 @@ Parameters
283289
- **temp_dir** (str, optional):
284290
The directory to store temporary files, defaults to some named folder in system's temp directory. If already exists, the contents will be deleted before zerox uses it.
285291
- **custom_system_prompt** (str, optional):
286-
The system prompt to use for the model, this overrides the default system prompt of zerox.Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning. Defaults to None.
292+
The system prompt to use for the model, this overrides the default system prompt of zerox. Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning. Defaults to None.
293+
- **custom_role** (str, optional):
294+
The role assigned to the model can be customized, overriding the default system role. Typically, this isn't necessary unless you need to specify a particular role for a given LLM. If you choose to set it, a friendly warning will be displayed. By default, this option is set to None.
287295
- **select_pages** (Optional[Union[int, Iterable[int]]], optional):
288296
Pages to process, can be a single page number or an iterable of page numbers, Defaults to None
289297
- **kwargs** (dict, optional):

py_zerox/pyzerox/constants/messages.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ class Messages:
1919
Custom system prompt was provided which overrides the default system prompt. We assume that you know what you are doing.
2020
"""
2121

22+
CUSTOM_SYSTEM_ROLE_WARNING = """
23+
Custom system role was provided which overrides the default system role. We assume that you know what you are doing.\
24+
"""
25+
2226
MAINTAIN_FORMAT_SELECTED_PAGES_WARNING = """
2327
The maintain_format flag is set to True in conjunction with select_pages input given. This may result in unexpected behavior.
2428
"""

py_zerox/pyzerox/constants/prompts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ class Prompts:
55
Convert the following PDF page to markdown.
66
Return only the markdown with no explanation text.
77
Do not exclude any content from the page.
8-
"""
8+
"""
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class Roles:
2+
"""Class for storing roles for the Zerox system."""
3+
4+
DEFAULT_SYSTEM_ROLE = "system"

py_zerox/pyzerox/core/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class ZeroxArgs:
1616
output_dir: Optional[str] = None
1717
temp_dir: Optional[str] = None
1818
custom_system_prompt: Optional[str] = None
19+
custom_role: Optional[str] = None
1920
select_pages: Optional[Union[int, Iterable[int]]] = None
2021
kwargs: Dict[str, Any] = field(default_factory=dict)
2122

py_zerox/pyzerox/core/zerox.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ async def zerox(
3434
output_dir: Optional[str] = None,
3535
temp_dir: Optional[str] = None,
3636
custom_system_prompt: Optional[str] = None,
37+
custom_role: Optional[str] = None,
3738
select_pages: Optional[Union[int, Iterable[int]]] = None,
3839
**kwargs
3940
) -> ZeroxOutput:
@@ -57,6 +58,8 @@ async def zerox(
5758
:type temp_dir: str, optional
5859
:param custom_system_prompt: The system prompt to use for the model, this overrides the default system prompt of zerox. Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning, defaults to None
5960
:type custom_system_prompt: str, optional
61+
:param custom_role: The role assigned to the model can be customized, overriding the default system role. Typically, this isn't necessary unless you need to specify a particular role for a given LLM. If you choose to set it, a friendly warning will be displayed. By default, this option is set to None.
62+
:type custom_role: str, optional
6063
:param select_pages: Pages to process, can be a single page number or an iterable of page numbers, defaults to None
6164
:type select_pages: int or Iterable[int], optional
6265
@@ -82,6 +85,10 @@ async def zerox(
8285
if custom_system_prompt:
8386
vision_model.system_prompt = custom_system_prompt
8487

88+
# override the system role if a custom role is provided
89+
if custom_role:
90+
vision_model.system_role = custom_role
91+
8592
# Check if both maintain_format and select_pages are provided
8693
if maintain_format and select_pages is not None:
8794
warnings.warn(Messages.MAINTAIN_FORMAT_SELECTED_PAGES_WARNING)
@@ -199,4 +206,4 @@ async def zerox(
199206
input_tokens=input_token_count,
200207
output_tokens=output_token_count,
201208
pages=formatted_pages,
202-
)
209+
)

py_zerox/pyzerox/models/modellitellm.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@
99
from ..errors import ModelAccessError, NotAVisionModel, MissingEnvironmentVariables
1010
from ..constants.messages import Messages
1111
from ..constants.prompts import Prompts
12+
from ..constants.roles import Roles
1213
from ..processor.image import encode_image_to_base64
1314

1415
DEFAULT_SYSTEM_PROMPT = Prompts.DEFAULT_SYSTEM_PROMPT
16+
DEFAULT_SYSTEM_ROLE = Roles.DEFAULT_SYSTEM_ROLE
1517

1618

1719
class litellmmodel(BaseModel):
1820
## setting the default system prompt
1921
_system_prompt = DEFAULT_SYSTEM_PROMPT
22+
_system_role = DEFAULT_SYSTEM_ROLE
2023

2124
def __init__(
2225
self,
@@ -41,6 +44,11 @@ def __init__(
4144
def system_prompt(self) -> str:
4245
'''Returns the system prompt for the model.'''
4346
return self._system_prompt
47+
48+
@property
49+
def system_role(self) -> str:
50+
'''Returns the system role for the model.'''
51+
return self._system_role
4452

4553
@system_prompt.setter
4654
def system_prompt(self, prompt: str) -> None:
@@ -49,6 +57,15 @@ def system_prompt(self, prompt: str) -> None:
4957
'''
5058
self._system_prompt = prompt
5159

60+
@system_role.setter
61+
def system_role(self, role: str) -> None:
62+
'''
63+
Sets/overrides the system role for the model.
64+
Will raise a friendly warning to notify the user.
65+
'''
66+
warnings.warn(f"{Messages.CUSTOM_SYSTEM_ROLE_WARNING}. Default role for zerox is: {DEFAULT_SYSTEM_ROLE}")
67+
self._system_role = role
68+
5269
## custom method on top of BaseModel
5370
def validate_environment(self) -> None:
5471
"""Validates the environment variables required for the model."""
@@ -123,7 +140,7 @@ async def _prepare_messages(
123140
# Default system message
124141
messages: List[Dict[str, Any]] = [
125142
{
126-
"role": "system",
143+
"role": self._system_role,
127144
"content": self._system_prompt,
128145
},
129146
]
@@ -133,7 +150,7 @@ async def _prepare_messages(
133150
if maintain_format and prior_page:
134151
messages.append(
135152
{
136-
"role": "system",
153+
"role": self._system_role,
137154
"content": f'Markdown must maintain consistent formatting with the following page: \n\n """{prior_page}"""',
138155
},
139156
)

0 commit comments

Comments
 (0)