Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 27 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,27 +120,30 @@ _Note:_ Benchmarks are currently done in the zero-shot setting.

| Rank | Model | SequenceMatcher Similarity | TFIDF Similarity | Time (s) | Cost ($) |
| --- | --- | --- | --- | --- | --- |
| 1 | gemini-2.5-pro | 0.907 (±0.151) | 0.973 (±0.053) | 22.23 | 0.02305 |
| 2 | AUTO | 0.905 (±0.111) | 0.967 (±0.051) | 10.31 | 0.00068 |
| 3 | gemini-2.5-flash | 0.902 (±0.151) | 0.984 (±0.030) | 48.67 | 0.01051 |
| 4 | gemini-2.0-flash | 0.900 (±0.127) | 0.971 (±0.040) | 12.43 | 0.00081 |
| 5 | mistral-ocr-latest | 0.890 (±0.097) | 0.930 (±0.095) | 5.69 | 0.00127 |
| 6 | claude-3-5-sonnet-20241022 | 0.873 (±0.195) | 0.937 (±0.095) | 16.86 | 0.01779 |
| 7 | gemini-1.5-flash | 0.868 (±0.198) | 0.965 (±0.041) | 17.19 | 0.00044 |
| 8 | claude-sonnet-4-20250514 | 0.814 (±0.197) | 0.903 (±0.150) | 21.99 | 0.02045 |
| 9 | accounts/fireworks/models/llama4-scout-instruct-basic | 0.804 (±0.242) | 0.931 (±0.067) | 9.76 | 0.00087 |
| 10 | claude-opus-4-20250514 | 0.798 (±0.230) | 0.878 (±0.159) | 21.01 | 0.09233 |
| 11 | gpt-4o | 0.796 (±0.264) | 0.898 (±0.117) | 28.23 | 0.01473 |
| 12 | accounts/fireworks/models/llama4-maverick-instruct-basic | 0.792 (±0.206) | 0.914 (±0.128) | 10.71 | 0.00149 |
| 13 | gemini-1.5-pro | 0.782 (±0.341) | 0.833 (±0.252) | 27.13 | 0.01275 |
| 14 | gpt-4.1-mini | 0.767 (±0.243) | 0.807 (±0.197) | 22.64 | 0.00352 |
| 15 | gpt-4o-mini | 0.727 (±0.245) | 0.832 (±0.136) | 17.20 | 0.00650 |
| 16 | meta-llama/Llama-Vision-Free | 0.682 (±0.223) | 0.847 (±0.135) | 12.31 | 0.00000 |
| 17 | meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo | 0.677 (±0.226) | 0.850 (±0.134) | 7.23 | 0.00015 |
| 18 | microsoft/phi-4-multimodal-instruct | 0.665 (±0.258) | 0.800 (±0.217) | 10.96 | 0.00049 |
| 19 | claude-3-7-sonnet-20250219 | 0.634 (±0.395) | 0.752 (±0.298) | 70.10 | 0.01775 |
| 20 | google/gemma-3-27b-it | 0.624 (±0.357) | 0.750 (±0.327) | 24.51 | 0.00020 |
| 21 | gpt-4.1 | 0.622 (±0.314) | 0.782 (±0.191) | 34.66 | 0.01461 |
| 22 | meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo | 0.559 (±0.233) | 0.822 (±0.119) | 27.74 | 0.01102 |
| 23 | ds4sd/SmolDocling-256M-preview | 0.486 (±0.378) | 0.583 (±0.355) | 108.91 | 0.00000 |
| 24 | qwen/qwen-2.5-vl-7b-instruct | 0.469 (±0.364) | 0.617 (±0.441) | 13.23 | 0.00060 |
| 1 | AUTO (with auto-selected model) | 0.893 (±0.135) | 0.957 (±0.068) | 22.25 | 0.00066 |
| 2 | AUTO | 0.889 (±0.115) | 0.971 (±0.048) | 9.29 | 0.00062 |
| 3 | mistral-ocr-latest | 0.882 (±0.111) | 0.927 (±0.094) | 5.64 | 0.00123 |
| 4 | gemini-2.5-flash | 0.877 (±0.169) | 0.986 (±0.028) | 52.28 | 0.01056 |
| 5 | gemini-2.5-pro | 0.876 (±0.195) | 0.976 (±0.049) | 22.65 | 0.02408 |
| 6 | gemini-2.0-flash | 0.867 (±0.152) | 0.975 (±0.038) | 11.97 | 0.00079 |
| 7 | claude-3-5-sonnet-20241022 | 0.851 (±0.191) | 0.927 (±0.102) | 16.68 | 0.01777 |
| 8 | gemini-1.5-flash | 0.843 (±0.223) | 0.969 (±0.039) | 15.98 | 0.00043 |
| 9 | gpt-5-mini | 0.816 (±0.210) | 0.920 (±0.108) | 52.99 | 0.00818 |
| 10 | gpt-5 | 0.806 (±0.224) | 0.919 (±0.092) | 97.62 | 0.05421 |
| 11 | claude-sonnet-4-20250514 | 0.789 (±0.192) | 0.898 (±0.140) | 21.31 | 0.02053 |
| 12 | gpt-4o | 0.774 (±0.271) | 0.889 (±0.126) | 28.51 | 0.01438 |
| 13 | claude-opus-4-20250514 | 0.774 (±0.224) | 0.877 (±0.151) | 28.56 | 0.09425 |
| 14 | accounts/fireworks/models/llama4-scout-instruct-basic | 0.769 (±0.248) | 0.938 (±0.064) | 13.48 | 0.00086 |
| 15 | accounts/fireworks/models/llama4-maverick-instruct-basic | 0.767 (±0.211) | 0.927 (±0.122) | 16.22 | 0.00147 |
| 16 | gemini-1.5-pro | 0.766 (±0.323) | 0.858 (±0.239) | 25.25 | 0.01173 |
| 17 | gpt-4.1-mini | 0.735 (±0.251) | 0.786 (±0.193) | 22.39 | 0.00344 |
| 18 | gpt-4o-mini | 0.718 (±0.249) | 0.842 (±0.131) | 18.11 | 0.00619 |
| 19 | meta-llama/Llama-Vision-Free | 0.677 (±0.247) | 0.865 (±0.132) | 11.34 | 0.00000 |
| 20 | meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo | 0.674 (±0.240) | 0.857 (±0.128) | 7.33 | 0.00015 |
| 21 | microsoft/phi-4-multimodal-instruct | 0.623 (±0.260) | 0.821 (±0.206) | 12.79 | 0.00046 |
| 22 | claude-3-7-sonnet-20250219 | 0.621 (±0.405) | 0.740 (±0.304) | 61.06 | 0.01696 |
| 23 | google/gemma-3-27b-it | 0.614 (±0.356) | 0.779 (±0.309) | 22.97 | 0.00020 |
| 24 | gpt-4.1 | 0.613 (±0.303) | 0.769 (±0.183) | 34.47 | 0.01415 |
| 25 | meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo | 0.562 (±0.242) | 0.815 (±0.140) | 27.10 | 0.01067 |
| 26 | ds4sd/SmolDocling-256M-preview | 0.468 (±0.378) | 0.554 (±0.361) | 103.86 | 0.00000 |
| 27 | qwen/qwen-2.5-vl-7b-instruct | 0.460 (±0.372) | 0.599 (±0.452) | 12.83 | 0.00057 |
246 changes: 132 additions & 114 deletions docs/benchmark.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,147 +89,165 @@ Here are the detailed parsing performance results for various models, sorted by
- Time (s)
- Cost ($)
* - 1
- gemini-2.5-pro
- 0.907 (±0.151)
- 0.973 (±0.053)
- 22.23
- 0.02305
- AUTO (with auto-selected model)
- 0.893 (±0.135)
- 0.957 (±0.068)
- 22.25
- 0.00066
* - 2
- AUTO
- 0.905 (±0.111)
- 0.967 (±0.051)
- 10.31
- 0.00068
- 0.889 (±0.115)
- 0.971 (±0.048)
- 9.29
- 0.00062
* - 3
- gemini-2.5-flash
- 0.902 (±0.151)
- 0.984 (±0.030)
- 48.67
- 0.01051
- mistral-ocr-latest
- 0.882 (±0.111)
- 0.927 (±0.094)
- 5.64
- 0.00123
* - 4
- gemini-2.0-flash
- 0.900 (±0.127)
- 0.971 (±0.040)
- 12.43
- 0.00081
- gemini-2.5-flash
- 0.877 (±0.169)
- 0.986 (±0.028)
- 52.28
- 0.01056
* - 5
- mistral-ocr-latest
- 0.890 (±0.097)
- 0.930 (±0.095)
- 5.69
- 0.00127
- gemini-2.5-pro
- 0.876 (±0.195)
- 0.976 (±0.049)
- 22.65
- 0.02408
* - 6
- claude-3-5-sonnet-20241022
- 0.873 (±0.195)
- 0.937 (±0.095)
- 16.86
- 0.01779
- gemini-2.0-flash
- 0.867 (±0.152)
- 0.975 (±0.038)
- 11.97
- 0.00079
* - 7
- gemini-1.5-flash
- 0.868 (±0.198)
- 0.965 (±0.041)
- 17.19
- 0.00044
- claude-3-5-sonnet-20241022
- 0.851 (±0.191)
- 0.927 (±0.102)
- 16.68
- 0.01777
* - 8
- claude-sonnet-4-20250514
- 0.814 (±0.197)
- 0.903 (±0.150)
- 21.99
- 0.02045
- gemini-1.5-flash
- 0.843 (±0.223)
- 0.969 (±0.039)
- 15.98
- 0.00043
* - 9
- accounts/fireworks/models/llama4-scout-instruct-basic
- 0.804 (±0.242)
- 0.931 (±0.067)
- 9.76
- 0.00087
- gpt-5-mini
- 0.816 (±0.210)
- 0.920 (±0.108)
- 52.99
- 0.00818
* - 10
- claude-opus-4-20250514
- 0.798 (±0.230)
- 0.878 (±0.159)
- 21.01
- 0.09233
- gpt-5
- 0.806 (±0.224)
- 0.919 (±0.092)
- 97.62
- 0.05421
* - 11
- gpt-4o
- 0.796 (±0.264)
- 0.898 (±0.117)
- 28.23
- 0.01473
- claude-sonnet-4-20250514
- 0.789 (±0.192)
- 0.898 (±0.140)
- 21.31
- 0.02053
* - 12
- accounts/fireworks/models/llama4-maverick-instruct-basic
- 0.792 (±0.206)
- 0.914 (±0.128)
- 10.71
- 0.00149
- gpt-4o
- 0.774 (±0.271)
- 0.889 (±0.126)
- 28.51
- 0.01438
* - 13
- gemini-1.5-pro
- 0.782 (±0.341)
- 0.833 (±0.252)
- 27.13
- 0.01275
- claude-opus-4-20250514
- 0.774 (±0.224)
- 0.877 (±0.151)
- 28.56
- 0.09425
* - 14
- gpt-4.1-mini
- 0.767 (±0.243)
- 0.807 (±0.197)
- 22.64
- 0.00352
- accounts/fireworks/models/llama4-scout-instruct-basic
- 0.769 (±0.248)
- 0.938 (±0.064)
- 13.48
- 0.00086
* - 15
- gpt-4o-mini
- 0.727 (±0.245)
- 0.832 (±0.136)
- 17.20
- 0.00650
- accounts/fireworks/models/llama4-maverick-instruct-basic
- 0.767 (±0.211)
- 0.927 (±0.122)
- 16.22
- 0.00147
* - 16
- gemini-1.5-pro
- 0.766 (±0.323)
- 0.858 (±0.239)
- 25.25
- 0.01173
* - 17
- gpt-4.1-mini
- 0.735 (±0.251)
- 0.786 (±0.193)
- 22.39
- 0.00344
* - 18
- gpt-4o-mini
- 0.718 (±0.249)
- 0.842 (±0.131)
- 18.11
- 0.00619
* - 19
- meta-llama/Llama-Vision-Free
- 0.682 (±0.223)
- 0.847 (±0.135)
- 12.31
- 0.677 (±0.247)
- 0.865 (±0.132)
- 11.34
- 0.00000
* - 17
* - 20
- meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
- 0.677 (±0.226)
- 0.850 (±0.134)
- 7.23
- 0.674 (±0.240)
- 0.857 (±0.128)
- 7.33
- 0.00015
* - 18
* - 21
- microsoft/phi-4-multimodal-instruct
- 0.665 (±0.258)
- 0.800 (±0.217)
- 10.96
- 0.00049
* - 19
- 0.623 (±0.260)
- 0.821 (±0.206)
- 12.79
- 0.00046
* - 22
- claude-3-7-sonnet-20250219
- 0.634 (±0.395)
- 0.752 (±0.298)
- 70.10
- 0.01775
* - 20
- 0.621 (±0.405)
- 0.740 (±0.304)
- 61.06
- 0.01696
* - 23
- google/gemma-3-27b-it
- 0.624 (±0.357)
- 0.750 (±0.327)
- 24.51
- 0.614 (±0.356)
- 0.779 (±0.309)
- 22.97
- 0.00020
* - 21
* - 24
- gpt-4.1
- 0.622 (±0.314)
- 0.782 (±0.191)
- 34.66
- 0.01461
* - 22
- 0.613 (±0.303)
- 0.769 (±0.183)
- 34.47
- 0.01415
* - 25
- meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
- 0.559 (±0.233)
- 0.822 (±0.119)
- 27.74
- 0.01102
* - 23
- 0.562 (±0.242)
- 0.815 (±0.140)
- 27.10
- 0.01067
* - 26
- ds4sd/SmolDocling-256M-preview
- 0.486 (±0.378)
- 0.583 (±0.355)
- 108.91
- 0.468 (±0.378)
- 0.554 (±0.361)
- 103.86
- 0.00000
* - 24
* - 27
- qwen/qwen-2.5-vl-7b-instruct
- 0.469 (±0.364)
- 0.617 (±0.441)
- 13.23
- 0.00060
- 0.460 (±0.372)
- 0.599 (±0.452)
- 12.83
- 0.00057

21 changes: 13 additions & 8 deletions lexoid/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@
from loguru import logger

from lexoid.core.parse_type.llm_parser import (
convert_doc_to_base64_images,
parse_llm_doc,
create_response,
get_api_provider_for_model,
parse_llm_doc,
)
from lexoid.core.parse_type.static_parser import parse_static_doc
from lexoid.core.utils import (
convert_to_pdf,
create_sub_pdf,
download_file,
get_webpage_soup,
Expand All @@ -29,6 +27,7 @@
split_pdf,
convert_schema_to_dict,
)
from lexoid.core.conversion_utils import convert_to_pdf, convert_doc_to_base64_images


class ParserType(Enum):
Expand All @@ -44,9 +43,15 @@ def wrapper(*args, **kwargs):
if len(args) > 0:
kwargs["path"] = args[0]
if len(args) > 1:
router_priority = kwargs.get("router_priority", "speed")
if args[1] == ParserType.AUTO:
parser_type = ParserType[router(kwargs["path"], router_priority)]
router_priority = kwargs.get("router_priority", "speed")
autoselect_llm = kwargs.get("autoselect_llm", True)
routed_parser_type, model = router(
kwargs["path"], router_priority, autoselect_llm=autoselect_llm
)
if model is not None:
kwargs["model"] = model
parser_type = ParserType[routed_parser_type]
logger.debug(f"Auto-detected parser type: {parser_type}")
kwargs["routed"] = True
else:
Expand Down Expand Up @@ -223,9 +228,9 @@ def parse(
else:
return recursive_read_html(path, depth)

assert is_supported_file_type(path), (
f"Unsupported file type {os.path.splitext(path)[1]}"
)
assert is_supported_file_type(
path
), f"Unsupported file type {os.path.splitext(path)[1]}"

if as_pdf and not path.lower().endswith(".pdf"):
pdf_path = os.path.join(temp_dir, "converted.pdf")
Expand Down
Loading