diff --git a/.github/workflows/test_gpu.yml b/.github/workflows/test_gpu.yml
index 427ebca6cad..adf20197069 100644
--- a/.github/workflows/test_gpu.yml
+++ b/.github/workflows/test_gpu.yml
@@ -68,6 +68,6 @@ jobs:
python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip install pytest
if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi
- python -m pip install -e .
+ python -m pip install -e ".[all]"
python -m pytest --verbose tests/
'
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 409009bdb24..158ddaf552c 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -47,8 +47,7 @@ jobs:
pip install pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
python -m pip install paddlepaddle==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
- pip install -e .
- python -m pip install 'paddlex@git+https://github.com/PaddlePaddle/PaddleX.git@develop'
+ python -m pip install -e '.[all]' 'paddlex@git+https://github.com/PaddlePaddle/PaddleX.git@develop'
- name: Test with pytest
run: |
pytest --verbose tests/
diff --git a/docs/quick_start.en.md b/docs/quick_start.en.md
index 2eb7cb28100..24ee90823e3 100644
--- a/docs/quick_start.en.md
+++ b/docs/quick_start.en.md
@@ -26,10 +26,14 @@ python -m pip install paddlepaddle-gpu==3.0.0 -i https://www.paddlepaddle.org.cn
#### 2. Install `paddleocr`
-```bash linenums="1"
-python -m pip install paddleocr
+Install the full functionality of PaddleOCR by running the following command:
+
+```bash
+python -m pip install "paddleocr[all]"
```
+PaddleOCR also supports installing specific features as needed. For details, please refer to the [PaddleOCR installation documentation](version3.x/installation.en.md).
+
### Command Line Usage
=== "PP-OCRv5"
diff --git a/docs/quick_start.md b/docs/quick_start.md
index a232c75cb92..47c850b6bb7 100644
--- a/docs/quick_start.md
+++ b/docs/quick_start.md
@@ -24,10 +24,14 @@ python -m pip install paddlepaddle-gpu==3.0.0 -i https://www.paddlepaddle.org.cn
#### 2. 安装`paddleocr`
+执行如下命令安装 PaddleOCR 的完整功能:
+
```bash
-pip install paddleocr
+python -m pip install "paddleocr[all]"
```
+PaddleOCR 也支持根据需要安装部分功能,详情请参考 [PaddleOCR 安装文档](version3.x/installation.md)。
+
### 命令行使用
=== "PP-OCRv5"
diff --git a/docs/version3.x/installation.en.md b/docs/version3.x/installation.en.md
index e34bb8964e3..4db2be56571 100644
--- a/docs/version3.x/installation.en.md
+++ b/docs/version3.x/installation.en.md
@@ -17,15 +17,32 @@ If you only want to use the inference capabilities of PaddleOCR, please refer to
Install the latest version of the PaddleOCR inference package from PyPI:
```bash
+# If you only want to use the basic text recognition feature (returning text position coordinates and content)
python -m pip install paddleocr
+# If you want to use all functionalities, such as document parsing, document understanding, document translation, and key information extraction
+# python -m pip install "paddleocr[all]"
```
Or install from source (default is the development branch):
```bash
-python -m pip install "git+https://github.com/PaddlePaddle/PaddleOCR.git"
+# If you only want to use the basic text recognition feature (returning text position coordinates and content)
+python -m pip install "paddleocr@git+https://github.com/PaddlePaddle/PaddleOCR.git"
+# If you want to use all functionalities, such as document parsing, document understanding, document translation, and key information extraction
+# python -m pip install "paddleocr[all]@git+https://github.com/PaddlePaddle/PaddleOCR.git"
```
+In addition to the `all` dependency group demonstrated above, PaddleOCR also supports installing specific optional features by specifying other dependency groups. The available dependency groups provided by PaddleOCR are as follows:
+
+| Dependency Group | Functionality |
+| ---------------- | ------------------------ |
+| `doc-parser` | Document parsing, which can be used to extract layout elements in a document such as tables, formulas, stamps, and images. |
+| `ie` | Information extraction, which can be used to extract key information from documents, such as names, dates, addresses, amounts, and more. |
+| `trans` | Document translation, which can be used to translate a document from one language to another. |
+| `all` | Full functionality. |
+
+The general OCR pipeline (e.g., PP-OCRv3/v4/v5) and the document image preprocessing pipeline can be used without installing any additional dependency groups. Apart from these two pipelines, each remaining pipeline belongs to one and only one dependency group. You can refer to the usage documentation of each pipeline to determine which group it belongs to. For individual functional modules, installing any dependency group that includes the module will enable access to its core functionality.
+
## 2.2 Install Training Dependencies
To perform model training, exporting, etc., first clone the repository to your local machine:
diff --git a/docs/version3.x/installation.md b/docs/version3.x/installation.md
index 74a3eeef653..13ece078ddf 100644
--- a/docs/version3.x/installation.md
+++ b/docs/version3.x/installation.md
@@ -17,15 +17,32 @@ comments: true
从 PyPI 安装最新版本 PaddleOCR 推理包:
```bash
+# 只希望使用基础文字识别功能(返回文字位置坐标和文本内容)
python -m pip install paddleocr
+# 希望使用文档解析、文档理解、文档翻译、关键信息抽取等全部功能
+# python -m pip install "paddleocr[all]"
```
或者从源码安装(默认为开发分支):
```bash
-python -m pip install "git+https://github.com/PaddlePaddle/PaddleOCR.git"
+# 只希望使用基础文字识别功能(返回文字位置坐标和文本内容)
+python -m pip install "paddleocr@git+https://github.com/PaddlePaddle/PaddleOCR.git"
+# 希望使用文档解析、文档理解、文档翻译、关键信息抽取等全部功能
+# python -m pip install "paddleocr[all]@git+https://github.com/PaddlePaddle/PaddleOCR.git"
```
+除了上面演示的 `all` 依赖组以外,PaddleOCR 也支持通过指定其它依赖组,安装部分可选功能。PaddleOCR 提供的所有依赖组如下:
+
+| 依赖组名称 | 对应的功能 |
+| - | - |
+| `doc-parser` | 文档解析,可用于提取文档中的表格、公式、印章、图片等版面元素 |
+| `ie` | 信息抽取,可用于从文档中提取关键信息,如姓名、日期、地址、金额等 |
+| `trans` | 文档翻译,可用于将文档从一种语言翻译为另一种语言 |
+| `all` | 完整功能 |
+
+通用 OCR 产线(如 PP-OCRv3/v4/v5)、文档图像预处理产线的功能无需安装额外的依赖组即可使用。除了这两条产线外,每一条产线属于且仅属于一个依赖组。在各产线的使用文档中可以了解产线属于哪一依赖组。对于单功能模块,安装任意包含该模块的产线对应的依赖组后即可使用相关的基础功能。
+
## 2.2 安装训练依赖
要进行模型训练、导出等,需要首先将仓库克隆到本地:
diff --git a/docs/version3.x/pipeline_usage/PP-ChatOCRv4.en.md b/docs/version3.x/pipeline_usage/PP-ChatOCRv4.en.md
index bc1eb33dff3..c5f8f80323c 100644
--- a/docs/version3.x/pipeline_usage/PP-ChatOCRv4.en.md
+++ b/docs/version3.x/pipeline_usage/PP-ChatOCRv4.en.md
@@ -965,12 +965,10 @@ devanagari_PP-OCRv3_mobile_rec_infer.tar">Inference Model/推理模型/Inference Model/推理模型/Inference Model/推理模型/Inference Model/推理模型/Inference Model/推理模型/=3.1.0",
+ "paddlex[ocr-core]>=3.2.0",
"PyYAML>=6",
"typing-extensions>=4.12",
]
@@ -53,6 +53,12 @@ issues = "https://github.com/PaddlePaddle/PaddleOCR/issues"
[project.scripts]
paddleocr = "paddleocr.__main__:console_entry"
+[project.optional-dependencies]
+doc-parser = ["paddlex[ocr]>=3.2.0"]
+ie = ["paddlex[ie]>=3.2.0"]
+trans = ["paddlex[trans]>=3.2.0"]
+all = ["paddlex[ocr,ie,trans]>=3.2.0"]
+
[tool.setuptools.packages.find]
where = ["."]
include = ["paddleocr", "paddleocr.*"]