From cdd1ce0056c8ad98ec92325d97de3798d9b23719 Mon Sep 17 00:00:00 2001
From: Bobholamovic <bob1998425@hotmail.com>
Date: Fri, 15 Nov 2024 13:55:20 +0800
Subject: [PATCH 1/5] Add PP-ShiTuV2 FastAPI app

---
 .../cv_pipelines/face_recognition.en.md       |   7 +-
 .../cv_pipelines/face_recognition.md          |   7 +-
 .../general_image_recognition.en.md           | 659 ++++++++---------
 .../cv_pipelines/general_image_recognition.md | 693 +++++++-----------
 .../image_anomaly_detection.en.md             |   7 +-
 .../cv_pipelines/image_anomaly_detection.md   |   7 +-
 .../cv_pipelines/image_classification.en.md   |   7 +-
 .../cv_pipelines/image_classification.md      |   7 +-
 .../image_multi_label_classification.en.md    |   7 +-
 .../image_multi_label_classification.md       |   7 +-
 .../cv_pipelines/instance_segmentation.en.md  |   7 +-
 .../cv_pipelines/instance_segmentation.md     |   7 +-
 .../cv_pipelines/object_detection.en.md       |   7 +-
 .../cv_pipelines/object_detection.md          |   7 +-
 .../pedestrian_attribute_recognition.en.md    |   7 +-
 .../pedestrian_attribute_recognition.md       |   7 +-
 .../cv_pipelines/semantic_segmentation.en.md  |   7 +-
 .../cv_pipelines/semantic_segmentation.md     |   7 +-
 .../cv_pipelines/small_object_detection.en.md |   7 +-
 .../cv_pipelines/small_object_detection.md    |   7 +-
 .../vehicle_attribute_recognition.en.md       |   7 +-
 .../vehicle_attribute_recognition.md          |   7 +-
 ...ocument_scene_information_extraction.en.md |   7 +-
 .../document_scene_information_extraction.md  |   7 +-
 .../tutorials/ocr_pipelines/OCR.en.md         |   7 +-
 .../tutorials/ocr_pipelines/OCR.md            |   7 +-
 .../ocr_pipelines/formula_recognition.en.md   |   7 +-
 .../ocr_pipelines/formula_recognition.md      |   7 +-
 .../ocr_pipelines/layout_parsing.en.md        |   7 +-
 .../tutorials/ocr_pipelines/layout_parsing.md |   7 +-
 .../ocr_pipelines/seal_recognition.en.md      |   7 +-
 .../ocr_pipelines/seal_recognition.md         |   7 +-
 .../ocr_pipelines/table_recognition.en.md     |   7 +-
 .../ocr_pipelines/table_recognition.md        |   7 +-
 .../time_series_anomaly_detection.en.md       |   7 +-
 .../time_series_anomaly_detection.md          |   7 +-
 .../time_series_classification.en.md          |   7 +-
 .../time_series_classification.md             |   7 +-
 .../time_series_forecasting.en.md             |   7 +-
 .../time_series_forecasting.md                |   7 +-
 .../pipelines/ppchatocrv3/ppchatocrv3.py      |   2 +-
 .../serving/_pipeline_apps/__init__.py        |   8 +
 .../_pipeline_apps/anomaly_detection.py       |   4 +-
 .../_pipeline_apps/formula_recognition.py     |   4 +-
 .../_pipeline_apps/image_classification.py    |   4 +-
 .../_pipeline_apps/instance_segmentation.py   |   4 +-
 .../serving/_pipeline_apps/layout_parsing.py  |  26 +-
 .../multi_label_image_classification.py       |   4 +-
 .../_pipeline_apps/object_detection.py        |   4 +-
 .../pipelines/serving/_pipeline_apps/ocr.py   |   4 +-
 .../pedestrian_attribute_recognition.py       |   4 +-
 .../serving/_pipeline_apps/pp_shitu_v2.py     | 313 ++++++++
 .../serving/_pipeline_apps/ppchatocrv3.py     |  30 +-
 .../_pipeline_apps/seal_recognition.py        |   4 +-
 .../_pipeline_apps/semantic_segmentation.py   |   4 +-
 .../_pipeline_apps/small_object_detection.py  |   4 +-
 .../_pipeline_apps/table_recognition.py       |   8 +-
 .../pipelines/serving/_pipeline_apps/ts_ad.py |   4 +-
 .../pipelines/serving/_pipeline_apps/ts_fc.py |   4 +-
 .../vehicle_attribute_recognition.py          |   4 +-
 paddlex/inference/pipelines/serving/app.py    |   1 +
 .../pipelines/serving/file_storage.py         |  80 --
 .../inference/pipelines/serving/storage.py    | 161 ++++
 paddlex/inference/pipelines/serving/utils.py  |  25 +-
 64 files changed, 1295 insertions(+), 1033 deletions(-)
 create mode 100644 paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py
 delete mode 100644 paddlex/inference/pipelines/serving/file_storage.py
 create mode 100644 paddlex/inference/pipelines/serving/storage.py

diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.en.md
index dbd35d7bf0..5a74a53ad5 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.en.md
@@ -374,9 +374,10 @@ Below are the API reference and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>The response body and the request body of POST requests are both JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is successfully processed, the response status code is <code>200</code>, and the attributes of the response body are as follows:</li>
 </ul>
 <table>
@@ -425,7 +426,7 @@ Below are the API reference and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>The operations provided by the service are as follows:</p>
+<p>The main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.md
index 149b2cb684..9042bd09c1 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/face_recognition.md
@@ -391,9 +391,10 @@ data_root             # 数据集根目录，目录名称可以改变
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -442,7 +443,7 @@ data_root             # 数据集根目录，目录名称可以改变
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md
index 9ab6bc00b1..3093c219a1 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md
@@ -367,9 +367,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -377,32 +378,32 @@ Below are the API references and multi-language service invocation examples:
 <tr>
 <th>Name</th>
 <th>Type</th>
-<th>Description</th>
+<th>Meaning</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td><code>errorCode</code></td>
 <td><code>integer</code></td>
-<td>Error code. Fixed as <code>0</code>.</td>
+<td>Error code. Fixed to <code>0</code>.</td>
 </tr>
 <tr>
 <td><code>errorMsg</code></td>
 <td><code>string</code></td>
-<td>Error message. Fixed as <code>"Success"</code>.</td>
+<td>Error description. Fixed to <code>"Success"</code>.</td>
 </tr>
 </tbody>
 </table>
-<p>The response body may also have a <code>result</code> property of type <code>object</code>, which stores the operation result information.</p>
+<p>The response body may also have a <code>result</code> property, which is an <code>object</code> type that stores operation result information.</p>
 <ul>
-<li>When the request is not processed successfully, the response body properties are as follows:</li>
+<li>When the request is not processed successfully, the properties of the response body are as follows:</li>
 </ul>
 <table>
 <thead>
 <tr>
 <th>Name</th>
 <th>Type</th>
-<th>Description</th>
+<th>Meaning</th>
 </tr>
 </thead>
 <tbody>
@@ -414,58 +415,235 @@ Below are the API references and multi-language service invocation examples:
 <tr>
 <td><code>errorMsg</code></td>
 <td><code>string</code></td>
-<td>Error message.</td>
+<td>Error description.</td>
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>The main operations provided by the service are as follows:</p>
 <ul>
-<li><b><code>infer</code></b></li>
+<li><b><code>buildIndex</code></b></li>
 </ul>
-<p>Classify images.</p>
-<p><code>POST /image-classification</code></p>
+<p>Build feature vector index.</p>
+<p><code>POST /shitu-index-build</code></p>
 <ul>
-<li>The request body properties are as follows:</li>
+<li>The properties of the request body are as follows:</li>
 </ul>
 <table>
 <thead>
 <tr>
 <th>Name</th>
 <th>Type</th>
-<th>Description</th>
+<th>Meaning</th>
 <th>Required</th>
 </tr>
 </thead>
 <tbody>
 <tr>
+<td><code>imageLabelPairs</code></td>
+<td><code>array</code></td>
+<td>Image-label pairs for building the index.</td>
+<td>Yes</td>
+</tr>
+</tbody>
+</table>
+<p>Each element in <code>imageLabelPairs</code> is an <code>object</code> with the following properties:</p>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
 <td><code>image</code></td>
 <td><code>string</code></td>
-<td>The URL of an image file accessible by the service or the Base64 encoded result of the image file content.</td>
+<td>The URL of an image file accessible by the service, or the Base64 encoding result of the image file content.</td>
+</tr>
+<tr>
+<td><code>label</code></td>
+<td><code>string</code></td>
+<td>Label.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>When the request is processed successfully, the <code>result</code> of the response body has the following properties:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>The key corresponding to the index, used to identify the established index. Can be used as input for other operations.</td>
+</tr>
+<tr>
+<td><code>idMap</code></td>
+<td><code>object</code></td>
+<td>Mapping from vector ID to label.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><b><code>addImagesToIndex</code></b></li>
+</ul>
+<p>Add images (corresponding feature vectors) to the index.</p>
+<p><code>POST /shitu-index-add</code></p>
+<ul>
+<li>The properties of the request body are as follows:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+<th>Required</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>imageLabelPairs</code></td>
+<td><code>array</code></td>
+<td>Image-label pairs for building the index.</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>The key corresponding to the index. Provided by the <code>buildIndex</code> operation.</td>
 <td>Yes</td>
 </tr>
+</tbody>
+</table>
+<p>Each element in <code>imageLabelPairs</code> is an <code>object</code> with the following properties:</p>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>image</code></td>
+<td><code>string</code></td>
+<td>The URL of an image file accessible by the service, or the Base64 encoding result of the image file content.</td>
+</tr>
+<tr>
+<td><code>label</code></td>
+<td><code>string</code></td>
+<td>Label.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>When the request is processed successfully, the <code>result</code> of the response body has the following properties:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
 <tr>
-<td><code>inferenceParams</code></td>
+<td><code>idMap</code></td>
 <td><code>object</code></td>
-<td>Inference parameters.</td>
-<td>No</td>
+<td>Mapping from vector ID to label.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><b><code>removeImagesFromIndex</code></b></li>
+</ul>
+<p>Remove images (corresponding feature vectors) from the index.</p>
+<p><code>POST /shitu-index-remove</code></p>
+<ul>
+<li>The properties of the request body are as follows:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
+<th>Required</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>ids</code></td>
+<td><code>array</code></td>
+<td>IDs of the vectors to be removed from the index.</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>The key corresponding to the index. Provided by the <code>buildIndex</code> operation.</td>
+<td>Yes</td>
 </tr>
 </tbody>
 </table>
-<p>The properties of <code>inferenceParams</code> are as follows:</p>
+<ul>
+<li>When the request is processed successfully, the <code>result</code> of the response body has the following properties:</li>
+</ul>
 <table>
 <thead>
 <tr>
 <th>Name</th>
 <th>Type</th>
-<th>Description</th>
+<th>Meaning</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>idMap</code></td>
+<td><code>object</code></td>
+<td>Mapping from vector ID to label.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><b><code>infer</code></b></li>
+</ul>
+<p>Perform image recognition.</p>
+<p><code>POST /shitu-infer</code></p>
+<ul>
+<li>The properties of the request body are as follows:</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
 <th>Required</th>
 </tr>
 </thead>
 <tbody>
 <tr>
-<td><code>topK</code></td>
-<td><code>integer</code></td>
-<td>Only the top <code>topK</code> categories with the highest scores will be retained in the results.</td>
+<td><code>image</code></td>
+<td><code>string</code></td>
+<td>The URL of an image file accessible by the service, or the Base64 encoding result of the image file content.</td>
+<td>Yes</td>
+</tr>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>The key corresponding to the index. Provided by the <code>buildIndex</code> operation.</td>
 <td>No</td>
 </tr>
 </tbody>
@@ -478,61 +656,72 @@ Below are the API references and multi-language service invocation examples:
 <tr>
 <th>Name</th>
 <th>Type</th>
-<th>Description</th>
+<th>Meaning</th>
 </tr>
 </thead>
 <tbody>
 <tr>
-<td><code>categories</code></td>
+<td><code>detectedObjects</code></td>
 <td><code>array</code></td>
-<td>Image category information.</td>
+<td>Information of the detected targets.</td>
 </tr>
 <tr>
 <td><code>image</code></td>
 <td><code>string</code></td>
-<td>The image classification result image. The image is in JPEG format and encoded using Base64.</td>
+<td>Recognition result image. The image is in JPEG format, encoded with Base64.</td>
 </tr>
 </tbody>
 </table>
-<p>Each element in <code>categories</code> is an <code>object</code> with the following properties:</p>
+<p>Each element in <code>detectedObjects</code> is an <code>object</code> with the following properties:</p>
 <table>
 <thead>
 <tr>
 <th>Name</th>
 <th>Type</th>
-<th>Description</th>
+<th>Meaning</th>
 </tr>
 </thead>
 <tbody>
 <tr>
-<td><code>id</code></td>
-<td><code>integer</code></td>
-<td>Category ID.</td>
+<td><code>bbox</code></td>
+<td><code>array</code></td>
+<td>Target location. The elements in the array are the x-coordinate of the upper-left corner, the y-coordinate of the upper-left corner, the x-coordinate of the lower-right corner, and the y-coordinate of the lower-right corner, respectively.</td>
+</tr>
+<tr>
+<td><code>recResults</code></td>
+<td><code>array</code></td>
+<td>Recognition results.</td>
+</tr>
+<tr>
+<td><code>score</code></td>
+<td><code>number</code></td>
+<td>Detection score.</td>
+</tr>
+</tbody>
+</table>
+<p>Each element in <code>recResults</code> is an <code>object</code> with the following properties:</p>
+<table>
+<thead>
+<tr>
+<th>Name</th>
+<th>Type</th>
+<th>Meaning</th>
 </tr>
+</thead>
+<tbody>
 <tr>
-<td><code>name</code></td>
+<td><code>label</code></td>
 <td><code>string</code></td>
-<td>Category name.</td>
+<td>Label.</td>
 </tr>
 <tr>
 <td><code>score</code></td>
 <td><code>number</code></td>
-<td>Category score.</td>
+<td>Recognition score.</td>
 </tr>
 </tbody>
 </table>
-<p>An example of <code>result</code> is as follows:</p>
-<pre><code class="language-json">{
-&quot;categories&quot;: [
-{
-&quot;id&quot;: 5,
-&quot;name&quot;: &quot;Rabbit&quot;,
-&quot;score&quot;: 0.93
-}
-],
-&quot;image&quot;: &quot;xxxxxx&quot;
-}
-</code></pre></details>
+</details>
 
 <details><summary>Multi-Language Service Invocation Examples</summary>
 
@@ -541,336 +730,72 @@ Below are the API references and multi-language service invocation examples:
 
 
 <pre><code class="language-python">import base64
+import pprint
+import sys
+
 import requests
 
-API_URL = &quot;http://localhost:8080/image-classification&quot;
-image_path = &quot;./demo.jpg&quot;
+API_BASE_URL = &quot;http://0.0.0.0:8080&quot;
+
+base_image_label_pairs = [
+    {&quot;image&quot;: &quot;./demo0.jpg&quot;, &quot;label&quot;: &quot;rabbit&quot;},
+    {&quot;image&quot;: &quot;./demo1.jpg&quot;, &quot;label&quot;: &quot;rabbit&quot;},
+    {&quot;image&quot;: &quot;./demo2.jpg&quot;, &quot;label&quot;: &quot;puppy&quot;},
+]
+image_label_pairs_to_add = [
+    {&quot;image&quot;: &quot;./demo3.jpg&quot;, &quot;label&quot;: &quot;puppy&quot;},
+]
+infer_image_path = &quot;./demo4.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
-with open(image_path, &quot;rb&quot;) as file:
+for pair in base_image_label_pairs:
+    with open(pair[&quot;image&quot;], &quot;rb&quot;) as file:
+        image_bytes = file.read()
+        image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
+    pair[&quot;image&quot;] = image_data
+
+payload = {&quot;imageLabelPairs&quot;: base_image_label_pairs}
+resp_index_build = requests.post(f&quot;{API_BASE_URL}/shitu-index-build&quot;, json=payload)
+if resp_index_build.status_code != 200:
+    print(f&quot;Request to shitu-index-build failed with status code {resp_index_build}.&quot;)
+    pprint.pp(resp_index_build.json())
+    sys.exit(1)
+result_index_build = resp_index_build.json()[&quot;result&quot;]
+print(f&quot;Number of images indexed: {len(result_index_build['idMap'])}&quot;)
+
+for pair in image_label_pairs_to_add:
+    with open(pair[&quot;image&quot;], &quot;rb&quot;) as file:
+        image_bytes = file.read()
+        image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
+    pair[&quot;image&quot;] = image_data
+
+payload = {&quot;imageLabelPairs&quot;: image_label_pairs_to_add, &quot;indexKey&quot;: result_index_build[&quot;indexKey&quot;]}
+resp_index_add = requests.post(f&quot;{API_BASE_URL}/shitu-index-add&quot;, json=payload)
+if resp_index_add.status_code != 200:
+    print(f&quot;Request to shitu-index-add failed with status code {resp_index_add}.&quot;)
+    pprint.pp(resp_index_add.json())
+    sys.exit(1)
+result_index_add = resp_index_add.json()[&quot;result&quot;]
+print(f&quot;Number of images indexed: {len(result_index_add['idMap'])}&quot;)
+
+with open(infer_image_path, &quot;rb&quot;) as file:
     image_bytes = file.read()
     image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
 
-payload = {&quot;image&quot;: image_data}
-
-response = requests.post(API_URL, json=payload)
+payload = {&quot;image&quot;: image_data, &quot;indexKey&quot;: result_index_build[&quot;indexKey&quot;]}
+resp_infer = requests.post(f&quot;{API_BASE_URL}/shitu-infer&quot;, json=payload)
+if resp_infer.status_code != 200:
+    print(f&quot;Request to shitu-infer failed with status code {resp_infer}.&quot;)
+    pprint.pp(resp_infer.json())
+    sys.exit(1)
+result_infer = resp_infer.json()[&quot;result&quot;]
 
-assert response.status_code == 200
-result = response.json()[&quot;result&quot;]
 with open(output_image_path, &quot;wb&quot;) as file:
-    file.write(base64.b64decode(result[&quot;image&quot;]))
+    file.write(base64.b64decode(result_infer[&quot;image&quot;]))
 print(f&quot;Output image saved at {output_image_path}&quot;)
-print(&quot;\nCategories:&quot;)
-print(result[&quot;categories&quot;])
+print(&quot;\nDetected objects:&quot;)
+pprint.pp(result_infer[&quot;detectedObjects&quot;])
 </code></pre></details>
-<details><summary>C++</summary>
-
-<pre><code class="language-cpp">#include &lt;iostream&gt;
-#include &quot;cpp-httplib/httplib.h&quot; // https://github.com/Huiyicc/cpp-httplib
-#include &quot;nlohmann/json.hpp&quot; // https://github.com/nlohmann/json
-#include &quot;base64.hpp&quot; // https://github.com/tobiaslocker/base64
-
-int main() {
-    httplib::Client client(&quot;localhost:8080&quot;);
-    const std::string imagePath = &quot;./demo.jpg&quot;;
-    const std::string outputImagePath = &quot;./out.jpg&quot;;
-
-    httplib::Headers headers = {
-        {&quot;Content-Type&quot;, &quot;application/json&quot;}
-    };
-
-    std::ifstream file(imagePath, std::ios::binary | std::ios::ate);
-    std::streamsize size = file.tellg();
-    file.seekg(0, std::ios::beg);
-
-    std::vector&lt;char&gt; buffer(size);
-    if (!file.read(buffer.data(), size)) {
-        std::cerr &lt;&lt; &quot;Error reading file.&quot; &lt;&lt; std::endl;
-        return 1;
-    }
-    std::string bufferStr(reinterpret_cast&lt;const char*&gt;(buffer.data()), buffer.size());
-    std::string encodedImage = base64::to_base64(bufferStr);
-
-    nlohmann::json jsonObj;
-    jsonObj[&quot;image&quot;] = encodedImage;
-    std::string body = jsonObj.dump();
-
-    auto response = client.Post(&quot;/image-classification&quot;, headers, body, &quot;application/json&quot;);
-    if (response &amp;&amp; response-&gt;status == 200) {
-        nlohmann::json jsonResponse = nlohmann::json::parse(response-&gt;body);
-        auto result = jsonResponse[&quot;result&quot;];
-
-        encodedImage = result[&quot;image&quot;];
-        std::string decodedString = base64::from_base64(encodedImage);
-        std::vector&lt;unsigned char&gt; decodedImage(decodedString.begin(), decodedString.end());
-        std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out);
-        if (outputImage.is_open()) {
-            outputImage.write(reinterpret_cast&lt;char*&gt;(decodedImage.data()), decodedImage.size());
-            outputImage.close();
-            std::cout &lt;&lt; &quot;Output image saved at &quot; &lt;&lt; outPutImagePath &lt;&lt; std::endl;
-        } else {
-            std::cerr &lt;&lt; &quot;Unable to open file for writing: &quot; &lt;&lt; outPutImagePath &lt;&lt; std::endl;
-        }
-
-        auto categories = result[&quot;categories&quot;];
-        std::cout &lt;&lt; &quot;\nCategories:&quot; &lt;&lt; std::endl;
-        for (const auto&amp; category : categories) {
-            std::cout &lt;&lt; category &lt;&lt; std::endl;
-        }
-    } else {
-        std::cout &lt;&lt; &quot;Failed to send HTTP request.&quot; &lt;&lt; std::endl;
-        return 1;
-    }
-
-    return 0;
-}
-</code></pre></details>
-
-<details><summary>Java</summary>
-
-<pre><code class="language-java">import okhttp3.*;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Base64;
-
-public class Main {
-    public static void main(String[] args) throws IOException {
-        String API_URL = &quot;http://localhost:8080/image-classification&quot;;
-        String imagePath = &quot;./demo.jpg&quot;;
-        String outputImagePath = &quot;./out.jpg&quot;;
-
-        File file = new File(imagePath);
-        byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
-        String imageData = Base64.getEncoder().encodeToString(fileContent);
-
-        ObjectMapper objectMapper = new ObjectMapper();
-        ObjectNode params = objectMapper.createObjectNode();
-        params.put(&quot;image&quot;, imageData);
-
-        OkHttpClient client = new OkHttpClient();
-        MediaType JSON = MediaType.Companion.get(&quot;application/json; charset=utf-8&quot;);
-        RequestBody body = RequestBody.Companion.create(params.toString(), JSON);
-        Request request = new Request.Builder()
-                .url(API_URL)
-                .post(body)
-                .build();
-
-        try (Response response = client.newCall(request).execute()) {
-            if (response.isSuccessful()) {
-                String responseBody = response.body().string();
-                JsonNode resultNode = objectMapper.readTree(responseBody);
-                JsonNode result = resultNode.get(&quot;result&quot;);
-                String base64Image = result.get(&quot;image&quot;).asText();
-                JsonNode categories = result.get(&quot;categories&quot;);
-
-                byte[] imageBytes = Base64.getDecoder().decode(base64Image);
-                try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
-                    fos.write(imageBytes);
-                }
-                System.out.println(&quot;Output image saved at &quot; + outputImagePath);
-                System.out.println(&quot;\nCategories: &quot; + categories.toString());
-            } else {
-                System.err.println(&quot;Request failed with code: &quot; + response.code());
-            }
-        }
-    }
-}
-</code></pre></details>
-
-<details><summary>Go</summary>
-
-<pre><code class="language-go">package main
-
-import (
-    &quot;bytes&quot;
-    &quot;encoding/base64&quot;
-    &quot;encoding/json&quot;
-    &quot;fmt&quot;
-    &quot;io/ioutil&quot;
-    &quot;net/http&quot;
-)
-
-func main() {
-    API_URL := &quot;http://localhost:8080/image-classification&quot;
-    imagePath := &quot;./demo.jpg&quot;
-    outputImagePath := &quot;./out.jpg&quot;
-
-    imageBytes, err := ioutil.ReadFile(imagePath)
-    if err != nil {
-        fmt.Println(&quot;Error reading image file:&quot;, err)
-        return
-    }
-    imageData := base64.StdEncoding.EncodeToString(imageBytes)
-
-    payload := map[string]string{&quot;image&quot;: imageData}
-    payloadBytes, err := json.Marshal(payload)
-    if err != nil {
-        fmt.Println(&quot;Error marshaling payload:&quot;, err)
-        return
-    }
-
-    client := &amp;http.Client{}
-    req, err := http.NewRequest(&quot;POST&quot;, API_URL, bytes.NewBuffer(payloadBytes))
-    if err != nil {
-        fmt.Println(&quot;Error creating request:&quot;, err)
-        return
-    }
-
-    res, err := client.Do(req)
-    if err != nil {
-        fmt.Println(&quot;Error sending request:&quot;, err)
-        return
-    }
-    defer res.Body.Close()
-
-    body, err := ioutil.ReadAll(res.Body)
-    if err != nil {
-        fmt.Println(&quot;Error reading response body:&quot;, err)
-        return
-    }
-    type Response struct {
-        Result struct {
-            Image      string   `json:&quot;image&quot;`
-            Categories []map[string]interface{} `json:&quot;categories&quot;`
-        } `json:&quot;result&quot;`
-    }
-    var respData Response
-    err = json.Unmarshal([]byte(string(body)), &amp;respData)
-    if err != nil {
-        fmt.Println(&quot;Error unmarshaling response body:&quot;, err)
-        return
-    }
-
-    outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image)
-    if err != nil {
-        fmt.Println(&quot;Error decoding base64 image data:&quot;, err)
-        return
-    }
-    err = ioutil.WriteFile(outputImagePath, outputImageData, 0644)
-    if err != nil {
-        fmt.Println(&quot;Error writing image to file:&quot;, err)
-        return
-    }
-    fmt.Printf(&quot;Image saved at %s.jpg\n&quot;, outputImagePath)
-    fmt.Println(&quot;\nCategories:&quot;)
-    for _, category := range respData.Result.Categories {
-        fmt.Println(category)
-    }
-}
-</code></pre></details>
-
-<details><summary>C#</summary>
-
-<pre><code class="language-csharp">using System;
-using System.IO;
-using System.Net.Http;
-using System.Net.Http.Headers;
-using System.Text;
-using System.Threading.Tasks;
-using Newtonsoft.Json.Linq;
-
-class Program
-{
-    static readonly string API_URL = &quot;http://localhost:8080/image-classification&quot;;
-    static readonly string imagePath = &quot;./demo.jpg&quot;;
-    static readonly string outputImagePath = &quot;./out.jpg&quot;;
-
-    static async Task Main(string[] args)
-    {
-        var httpClient = new HttpClient();
-
-        byte[] imageBytes = File.ReadAllBytes(imagePath);
-        string image_data = Convert.ToBase64String(imageBytes);
-
-        var payload = new JObject{ { &quot;image&quot;, image_data } };
-        var content = new StringContent(payload.ToString(), Encoding.UTF8, &quot;application/json&quot;);
-
-        HttpResponseMessage response = await httpClient.PostAsync(API_URL, content);
-        response.EnsureSuccessStatusCode();
-
-        string responseBody = await response.Content.ReadAsStringAsync();
-        JObject jsonResponse = JObject.Parse(responseBody);
-
-        string base64Image = jsonResponse[&quot;result&quot;][&quot;image&quot;].ToString();
-        byte[] outputImageBytes = Convert.FromBase64String(base64Image);
-
-        File.WriteAllBytes(outputImagePath, outputImageBytes);
-        Console.WriteLine($&quot;Output image saved at {outputImagePath}&quot;);
-        Console.WriteLine(&quot;\nCategories:&quot;);
-        Console.WriteLine(jsonResponse[&quot;result&quot;][&quot;categories&quot;].ToString());
-    }
-}
-</code></pre></details>
-
-<details><summary>Node.js</summary>
-
-<pre><code class="language-js">const axios = require('axios');
-const fs = require('fs');
-
-const API_URL = 'http://localhost:8080/image-classification'
-const imagePath = './demo.jpg'
-const outputImagePath = &quot;./out.jpg&quot;;
-
-let config = {
-   method: 'POST',
-   maxBodyLength: Infinity,
-   url: API_URL,
-   data: JSON.stringify({
-    'image': encodeImageToBase64(imagePath)
-  })
-};
-
-function encodeImageToBase64(filePath) {
-  const bitmap = fs.readFileSync(filePath);
-  return Buffer.from(bitmap).toString('base64');
-}
-
-axios.request(config)
-.then((response) =&gt; {
-    const result = response.data[&quot;result&quot;];
-    const imageBuffer = Buffer.from(result[&quot;image&quot;], 'base64');
-    fs.writeFile(outputImagePath, imageBuffer, (err) =&gt; {
-      if (err) throw err;
-      console.log(`Output image saved at ${outputImagePath}`);
-    });
-    console.log(&quot;\nCategories:&quot;);
-    console.log(result[&quot;categories&quot;]);
-})
-.catch((error) =&gt; {
-  console.log(error);
-});
-</code></pre></details>
-<details><summary>PHP</summary>
-
-<pre><code class="language-php">&lt;?php
-
-$API_URL = &quot;http://localhost:8080/image-classification&quot;;
-$image_path = &quot;./demo.jpg&quot;;
-$output_image_path = &quot;./out.jpg&quot;;
-
-$image_data = base64_encode(file_get_contents($image_path));
-$payload = array(&quot;image&quot; =&gt; $image_data);
-
-$ch = curl_init($API_URL);
-curl_setopt($ch, CURLOPT_POST, true);
-curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
-curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-$response = curl_exec($ch);
-curl_close($ch);
-
-$result = json_decode($response, true)[&quot;result&quot;];
-file_put_contents($output_image_path, base64_decode($result[&quot;image&quot;]));
-echo &quot;Output image saved at &quot; . $output_image_path . &quot;\n&quot;;
-echo &quot;\nCategories:\n&quot;;
-print_r($result[&quot;categories&quot;]);
-?&gt;
-</code></pre></details>
-
 </details>
 <br/>
 
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md
index 99c8d70328..961a83873d 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md
@@ -397,9 +397,10 @@ data_root             # 数据集根目录，目录名称可以改变
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -448,12 +449,12 @@ data_root             # 数据集根目录，目录名称可以改变
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
-<li><b><code>infer</code></b></li>
+<li><b><code>buildIndex</code></b></li>
 </ul>
-<p>获取图像OCR结果。</p>
-<p><code>POST /ocr</code></p>
+<p>构建特征向量索引。</p>
+<p><code>POST /shitu-index-build</code></p>
 <ul>
 <li>请求体的属性如下：</li>
 </ul>
@@ -468,20 +469,191 @@ data_root             # 数据集根目录，目录名称可以改变
 </thead>
 <tbody>
 <tr>
+<td><code>imageLabelPairs</code></td>
+<td><code>array</code></td>
+<td>用于构建索引的图像-标签对。</td>
+<td>是</td>
+</tr>
+</tbody>
+</table>
+<p><code>imageLabelPairs</code>中的每个元素为一个<code>object</code>，具有如下属性：</p>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+</tr>
+</thead>
+<tbody>
+<tr>
 <td><code>image</code></td>
 <td><code>string</code></td>
 <td>服务可访问的图像文件的URL或图像文件内容的Base64编码结果。</td>
+</tr>
+<tr>
+<td><code>label</code></td>
+<td><code>string</code></td>
+<td>标签。</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>请求处理成功时，响应体的<code>result</code>具有如下属性：</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>索引对应的键，用于标识建立的索引。可用作其他操作的输入。</td>
+</tr>
+<tr>
+<td><code>idMap</code></td>
+<td><code>object</code></td>
+<td>向量ID到标签的映射。</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><b><code>addImagesToIndex</code></b></li>
+</ul>
+<p>将图像（对应的特征向量）加入索引。</p>
+<p><code>POST /shitu-index-add</code></p>
+<ul>
+<li>请求体的属性如下：</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+<th>是否必填</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>imageLabelPairs</code></td>
+<td><code>array</code></td>
+<td>用于构建索引的图像-标签对。</td>
 <td>是</td>
 </tr>
 <tr>
-<td><code>inferenceParams</code></td>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>索引对应的键。由<code>buildIndex</code>操作提供。</td>
+<td>是</td>
+</tr>
+</tbody>
+</table>
+<p><code>imageLabelPairs</code>中的每个元素为一个<code>object</code>，具有如下属性：</p>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>image</code></td>
+<td><code>string</code></td>
+<td>服务可访问的图像文件的URL或图像文件内容的Base64编码结果。</td>
+</tr>
+<tr>
+<td><code>label</code></td>
+<td><code>string</code></td>
+<td>标签。</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>请求处理成功时，响应体的<code>result</code>具有如下属性：</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>idMap</code></td>
 <td><code>object</code></td>
-<td>推理参数。</td>
-<td>否</td>
+<td>向量ID到标签的映射。</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li><b><code>removeImagesFromIndex</code></b></li>
+</ul>
+<p>从索引中移除图像（对应的特征向量）。</p>
+<p><code>POST /shitu-index-remove</code></p>
+<ul>
+<li>请求体的属性如下：</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+<th>是否必填</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>ids</code></td>
+<td><code>array</code></td>
+<td>需要从索引中移除的向量的ID。</td>
+<td>是</td>
+</tr>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>索引对应的键。由<code>buildIndex</code>操作提供。</td>
+<td>是</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>请求处理成功时，响应体的<code>result</code>具有如下属性：</li>
+</ul>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>idMap</code></td>
+<td><code>object</code></td>
+<td>向量ID到标签的映射。</td>
 </tr>
 </tbody>
 </table>
-<p><code>inferenceParams</code>的属性如下：</p>
+<ul>
+<li><b><code>infer</code></b></li>
+</ul>
+<p>进行图像识别。</p>
+<p><code>POST /shitu-infer</code></p>
+<ul>
+<li>请求体的属性如下：</li>
+</ul>
 <table>
 <thead>
 <tr>
@@ -493,9 +665,15 @@ data_root             # 数据集根目录，目录名称可以改变
 </thead>
 <tbody>
 <tr>
-<td><code>maxLongSide</code></td>
-<td><code>integer</code></td>
-<td>推理时，若文本检测模型的输入图像较长边的长度大于<code>maxLongSide</code>，则将对图像进行缩放，使其较长边的长度等于<code>maxLongSide</code>。</td>
+<td><code>image</code></td>
+<td><code>string</code></td>
+<td>服务可访问的图像文件的URL或图像文件内容的Base64编码结果。</td>
+<td>是</td>
+</tr>
+<tr>
+<td><code>indexKey</code></td>
+<td><code>string</code></td>
+<td>索引对应的键。由<code>buildIndex</code>操作提供。</td>
 <td>否</td>
 </tr>
 </tbody>
@@ -513,18 +691,18 @@ data_root             # 数据集根目录，目录名称可以改变
 </thead>
 <tbody>
 <tr>
-<td><code>texts</code></td>
+<td><code>detectedObjects</code></td>
 <td><code>array</code></td>
-<td>文本位置、内容和得分。</td>
+<td>检测到的目标的信息。</td>
 </tr>
 <tr>
 <td><code>image</code></td>
 <td><code>string</code></td>
-<td>OCR结果图，其中标注检测到的文本位置。图像为JPEG格式，使用Base64编码。</td>
+<td>识别结果图。图像为JPEG格式，使用Base64编码。</td>
 </tr>
 </tbody>
 </table>
-<p><code>texts</code>中的每个元素为一个<code>object</code>，具有如下属性：</p>
+<p><code>detectedObjects</code>中的每个元素为一个<code>object</code>，具有如下属性：</p>
 <table>
 <thead>
 <tr>
@@ -535,73 +713,45 @@ data_root             # 数据集根目录，目录名称可以改变
 </thead>
 <tbody>
 <tr>
-<td><code>poly</code></td>
+<td><code>bbox</code></td>
+<td><code>array</code></td>
+<td>目标位置。数组中元素依次为边界框左上角x坐标、左上角y坐标、右下角x坐标以及右下角y坐标。</td>
+</tr>
+<tr>
+<td><code>recResults</code></td>
 <td><code>array</code></td>
-<td>文本位置。数组中元素依次为包围文本的多边形的顶点坐标。</td>
+<td>识别结果。</td>
 </tr>
 <tr>
-<td><code>text</code></td>
+<td><code>score</code></td>
+<td><code>number</code></td>
+<td>检测得分。</td>
+</tr>
+</tbody>
+</table>
+<p><code>recResults</code>中的每个元素为一个<code>object</code>，具有如下属性：</p>
+<table>
+<thead>
+<tr>
+<th>名称</th>
+<th>类型</th>
+<th>含义</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>label</code></td>
 <td><code>string</code></td>
-<td>文本内容。</td>
+<td>标签。</td>
 </tr>
 <tr>
 <td><code>score</code></td>
 <td><code>number</code></td>
-<td>文本识别得分。</td>
+<td>识别得分。</td>
 </tr>
 </tbody>
 </table>
-<p><code>result</code>示例如下：</p>
-<pre><code class="language-json">{
-&quot;texts&quot;: [
-{
-&quot;poly&quot;: [
-[
-444,
-244
-],
-[
-705,
-244
-],
-[
-705,
-311
-],
-[
-444,
-311
-]
-],
-&quot;text&quot;: &quot;北京南站&quot;,
-&quot;score&quot;: 0.9
-},
-{
-&quot;poly&quot;: [
-[
-992,
-248
-],
-[
-1263,
-251
-],
-[
-1263,
-318
-],
-[
-992,
-315
-]
-],
-&quot;text&quot;: &quot;天津站&quot;,
-&quot;score&quot;: 0.5
-}
-],
-&quot;image&quot;: &quot;xxxxxx&quot;
-}
-</code></pre></details>
+</details>
 
 <details><summary>多语言调用服务示例</summary>
 
@@ -610,358 +760,71 @@ data_root             # 数据集根目录，目录名称可以改变
 
 
 <pre><code class="language-python">import base64
+import pprint
+import sys
+
 import requests
 
-API_URL = &quot;http://localhost:8080/ocr&quot; # 服务URL
-image_path = &quot;./demo.jpg&quot;
+API_BASE_URL = &quot;http://0.0.0.0:8080&quot;
+
+base_image_label_pairs = [
+    {&quot;image&quot;: &quot;./demo0.jpg&quot;, &quot;label&quot;: &quot;兔子&quot;},
+    {&quot;image&quot;: &quot;./demo1.jpg&quot;, &quot;label&quot;: &quot;兔子&quot;},
+    {&quot;image&quot;: &quot;./demo2.jpg&quot;, &quot;label&quot;: &quot;小狗&quot;},
+]
+image_label_pairs_to_add = [
+    {&quot;image&quot;: &quot;./demo3.jpg&quot;, &quot;label&quot;: &quot;小狗&quot;},
+]
+infer_image_path = &quot;./demo4.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
-# 对本地图像进行Base64编码
-with open(image_path, &quot;rb&quot;) as file:
+for pair in base_image_label_pairs:
+    with open(pair[&quot;image&quot;], &quot;rb&quot;) as file:
+        image_bytes = file.read()
+        image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
+    pair[&quot;image&quot;] = image_data
+
+payload = {&quot;imageLabelPairs&quot;: base_image_label_pairs}
+resp_index_build = requests.post(f&quot;{API_BASE_URL}/shitu-index-build&quot;, json=payload)
+if resp_index_build.status_code != 200:
+    print(f&quot;Request to shitu-index-build failed with status code {resp_index_build}.&quot;)
+    pprint.pp(resp_index_build.json())
+    sys.exit(1)
+result_index_build = resp_index_build.json()[&quot;result&quot;]
+print(f&quot;Number of images indexed: {len(result_index_build['idMap'])}&quot;)
+
+for pair in image_label_pairs_to_add:
+    with open(pair[&quot;image&quot;], &quot;rb&quot;) as file:
+        image_bytes = file.read()
+        image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
+    pair[&quot;image&quot;] = image_data
+
+payload = {&quot;imageLabelPairs&quot;: image_label_pairs_to_add, &quot;indexKey&quot;: result_index_build[&quot;indexKey&quot;]}
+resp_index_add = requests.post(f&quot;{API_BASE_URL}/shitu-index-add&quot;, json=payload)
+if resp_index_add.status_code != 200:
+    print(f&quot;Request to shitu-index-add failed with status code {resp_index_add}.&quot;)
+    pprint.pp(resp_index_add.json())
+    sys.exit(1)
+result_index_add = resp_index_add.json()[&quot;result&quot;]
+print(f&quot;Number of images indexed: {len(result_index_add['idMap'])}&quot;)
+
+with open(infer_image_path, &quot;rb&quot;) as file:
     image_bytes = file.read()
     image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
 
-payload = {&quot;image&quot;: image_data}  # Base64编码的文件内容或者图像URL
-
-# 调用API
-response = requests.post(API_URL, json=payload)
+payload = {&quot;image&quot;: image_data, &quot;indexKey&quot;: result_index_build[&quot;indexKey&quot;]}
+resp_infer = requests.post(f&quot;{API_BASE_URL}/shitu-infer&quot;, json=payload)
+if resp_infer.status_code != 200:
+    print(f&quot;Request to shitu-infer failed with status code {resp_infer}.&quot;)
+    pprint.pp(resp_infer.json())
+    sys.exit(1)
+result_infer = resp_infer.json()[&quot;result&quot;]
 
-# 处理接口返回数据
-assert response.status_code == 200
-result = response.json()[&quot;result&quot;]
 with open(output_image_path, &quot;wb&quot;) as file:
-    file.write(base64.b64decode(result[&quot;image&quot;]))
+    file.write(base64.b64decode(result_infer[&quot;image&quot;]))
 print(f&quot;Output image saved at {output_image_path}&quot;)
-print(&quot;\nDetected texts:&quot;)
-print(result[&quot;texts&quot;])
-</code></pre></details>
-
-<details><summary>C++</summary>
-
-<pre><code class="language-cpp">#include &lt;iostream&gt;
-#include &quot;cpp-httplib/httplib.h&quot; // https://github.com/Huiyicc/cpp-httplib
-#include &quot;nlohmann/json.hpp&quot; // https://github.com/nlohmann/json
-#include &quot;base64.hpp&quot; // https://github.com/tobiaslocker/base64
-
-int main() {
-    httplib::Client client(&quot;localhost:8080&quot;);
-    const std::string imagePath = &quot;./demo.jpg&quot;;
-    const std::string outputImagePath = &quot;./out.jpg&quot;;
-
-    httplib::Headers headers = {
-        {&quot;Content-Type&quot;, &quot;application/json&quot;}
-    };
-
-    // 对本地图像进行Base64编码
-    std::ifstream file(imagePath, std::ios::binary | std::ios::ate);
-    std::streamsize size = file.tellg();
-    file.seekg(0, std::ios::beg);
-
-    std::vector&lt;char&gt; buffer(size);
-    if (!file.read(buffer.data(), size)) {
-        std::cerr &lt;&lt; &quot;Error reading file.&quot; &lt;&lt; std::endl;
-        return 1;
-    }
-    std::string bufferStr(reinterpret_cast&lt;const char*&gt;(buffer.data()), buffer.size());
-    std::string encodedImage = base64::to_base64(bufferStr);
-
-    nlohmann::json jsonObj;
-    jsonObj[&quot;image&quot;] = encodedImage;
-    std::string body = jsonObj.dump();
-
-    // 调用API
-    auto response = client.Post(&quot;/ocr&quot;, headers, body, &quot;application/json&quot;);
-    // 处理接口返回数据
-    if (response &amp;&amp; response-&gt;status == 200) {
-        nlohmann::json jsonResponse = nlohmann::json::parse(response-&gt;body);
-        auto result = jsonResponse[&quot;result&quot;];
-
-        encodedImage = result[&quot;image&quot;];
-        std::string decodedString = base64::from_base64(encodedImage);
-        std::vector&lt;unsigned char&gt; decodedImage(decodedString.begin(), decodedString.end());
-        std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out);
-        if (outputImage.is_open()) {
-            outputImage.write(reinterpret_cast&lt;char*&gt;(decodedImage.data()), decodedImage.size());
-            outputImage.close();
-            std::cout &lt;&lt; &quot;Output image saved at &quot; &lt;&lt; outPutImagePath &lt;&lt; std::endl;
-        } else {
-            std::cerr &lt;&lt; &quot;Unable to open file for writing: &quot; &lt;&lt; outPutImagePath &lt;&lt; std::endl;
-        }
-
-        auto texts = result[&quot;texts&quot;];
-        std::cout &lt;&lt; &quot;\nDetected texts:&quot; &lt;&lt; std::endl;
-        for (const auto&amp; text : texts) {
-            std::cout &lt;&lt; text &lt;&lt; std::endl;
-        }
-    } else {
-        std::cout &lt;&lt; &quot;Failed to send HTTP request.&quot; &lt;&lt; std::endl;
-        return 1;
-    }
-
-    return 0;
-}
-</code></pre></details>
-
-<details><summary>Java</summary>
-
-<pre><code class="language-java">import okhttp3.*;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Base64;
-
-public class Main {
-    public static void main(String[] args) throws IOException {
-        String API_URL = &quot;http://localhost:8080/ocr&quot;; // 服务URL
-        String imagePath = &quot;./demo.jpg&quot;; // 本地图像
-        String outputImagePath = &quot;./out.jpg&quot;; // 输出图像
-
-        // 对本地图像进行Base64编码
-        File file = new File(imagePath);
-        byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
-        String imageData = Base64.getEncoder().encodeToString(fileContent);
-
-        ObjectMapper objectMapper = new ObjectMapper();
-        ObjectNode params = objectMapper.createObjectNode();
-        params.put(&quot;image&quot;, imageData); // Base64编码的文件内容或者图像URL
-
-        // 创建 OkHttpClient 实例
-        OkHttpClient client = new OkHttpClient();
-        MediaType JSON = MediaType.Companion.get(&quot;application/json; charset=utf-8&quot;);
-        RequestBody body = RequestBody.Companion.create(params.toString(), JSON);
-        Request request = new Request.Builder()
-                .url(API_URL)
-                .post(body)
-                .build();
-
-        // 调用API并处理接口返回数据
-        try (Response response = client.newCall(request).execute()) {
-            if (response.isSuccessful()) {
-                String responseBody = response.body().string();
-                JsonNode resultNode = objectMapper.readTree(responseBody);
-                JsonNode result = resultNode.get(&quot;result&quot;);
-                String base64Image = result.get(&quot;image&quot;).asText();
-                JsonNode texts = result.get(&quot;texts&quot;);
-
-                byte[] imageBytes = Base64.getDecoder().decode(base64Image);
-                try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
-                    fos.write(imageBytes);
-                }
-                System.out.println(&quot;Output image saved at &quot; + outputImagePath);
-                System.out.println(&quot;\nDetected texts: &quot; + texts.toString());
-            } else {
-                System.err.println(&quot;Request failed with code: &quot; + response.code());
-            }
-        }
-    }
-}
-</code></pre></details>
-
-<details><summary>Go</summary>
-
-<pre><code class="language-go">package main
-
-import (
-    &quot;bytes&quot;
-    &quot;encoding/base64&quot;
-    &quot;encoding/json&quot;
-    &quot;fmt&quot;
-    &quot;io/ioutil&quot;
-    &quot;net/http&quot;
-)
-
-func main() {
-    API_URL := &quot;http://localhost:8080/ocr&quot;
-    imagePath := &quot;./demo.jpg&quot;
-    outputImagePath := &quot;./out.jpg&quot;
-
-    // 对本地图像进行Base64编码
-    imageBytes, err := ioutil.ReadFile(imagePath)
-    if err != nil {
-        fmt.Println(&quot;Error reading image file:&quot;, err)
-        return
-    }
-    imageData := base64.StdEncoding.EncodeToString(imageBytes)
-
-    payload := map[string]string{&quot;image&quot;: imageData} // Base64编码的文件内容或者图像URL
-    payloadBytes, err := json.Marshal(payload)
-    if err != nil {
-        fmt.Println(&quot;Error marshaling payload:&quot;, err)
-        return
-    }
-
-    // 调用API
-    client := &amp;http.Client{}
-    req, err := http.NewRequest(&quot;POST&quot;, API_URL, bytes.NewBuffer(payloadBytes))
-    if err != nil {
-        fmt.Println(&quot;Error creating request:&quot;, err)
-        return
-    }
-
-    res, err := client.Do(req)
-    if err != nil {
-        fmt.Println(&quot;Error sending request:&quot;, err)
-        return
-    }
-    defer res.Body.Close()
-
-    // 处理接口返回数据
-    body, err := ioutil.ReadAll(res.Body)
-    if err != nil {
-        fmt.Println(&quot;Error reading response body:&quot;, err)
-        return
-    }
-    type Response struct {
-        Result struct {
-            Image      string   `json:&quot;image&quot;`
-            Texts []map[string]interface{} `json:&quot;texts&quot;`
-        } `json:&quot;result&quot;`
-    }
-    var respData Response
-    err = json.Unmarshal([]byte(string(body)), &amp;respData)
-    if err != nil {
-        fmt.Println(&quot;Error unmarshaling response body:&quot;, err)
-        return
-    }
-
-    outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image)
-    if err != nil {
-        fmt.Println(&quot;Error decoding base64 image data:&quot;, err)
-        return
-    }
-    err = ioutil.WriteFile(outputImagePath, outputImageData, 0644)
-    if err != nil {
-        fmt.Println(&quot;Error writing image to file:&quot;, err)
-        return
-    }
-    fmt.Printf(&quot;Image saved at %s.jpg\n&quot;, outputImagePath)
-    fmt.Println(&quot;\nDetected texts:&quot;)
-    for _, text := range respData.Result.Texts {
-        fmt.Println(text)
-    }
-}
-</code></pre></details>
-
-<details><summary>C#</summary>
-
-<pre><code class="language-csharp">using System;
-using System.IO;
-using System.Net.Http;
-using System.Net.Http.Headers;
-using System.Text;
-using System.Threading.Tasks;
-using Newtonsoft.Json.Linq;
-
-class Program
-{
-    static readonly string API_URL = &quot;http://localhost:8080/ocr&quot;;
-    static readonly string imagePath = &quot;./demo.jpg&quot;;
-    static readonly string outputImagePath = &quot;./out.jpg&quot;;
-
-    static async Task Main(string[] args)
-    {
-        var httpClient = new HttpClient();
-
-        // 对本地图像进行Base64编码
-        byte[] imageBytes = File.ReadAllBytes(imagePath);
-        string image_data = Convert.ToBase64String(imageBytes);
-
-        var payload = new JObject{ { &quot;image&quot;, image_data } }; // Base64编码的文件内容或者图像URL
-        var content = new StringContent(payload.ToString(), Encoding.UTF8, &quot;application/json&quot;);
-
-        // 调用API
-        HttpResponseMessage response = await httpClient.PostAsync(API_URL, content);
-        response.EnsureSuccessStatusCode();
-
-        // 处理接口返回数据
-        string responseBody = await response.Content.ReadAsStringAsync();
-        JObject jsonResponse = JObject.Parse(responseBody);
-
-        string base64Image = jsonResponse[&quot;result&quot;][&quot;image&quot;].ToString();
-        byte[] outputImageBytes = Convert.FromBase64String(base64Image);
-
-        File.WriteAllBytes(outputImagePath, outputImageBytes);
-        Console.WriteLine($&quot;Output image saved at {outputImagePath}&quot;);
-        Console.WriteLine(&quot;\nDetected texts:&quot;);
-        Console.WriteLine(jsonResponse[&quot;result&quot;][&quot;texts&quot;].ToString());
-    }
-}
-</code></pre></details>
-
-<details><summary>Node.js</summary>
-
-<pre><code class="language-js">const axios = require('axios');
-const fs = require('fs');
-
-const API_URL = 'http://localhost:8080/ocr'
-const imagePath = './demo.jpg'
-const outputImagePath = &quot;./out.jpg&quot;;
-
-let config = {
-   method: 'POST',
-   maxBodyLength: Infinity,
-   url: API_URL,
-   data: JSON.stringify({
-    'image': encodeImageToBase64(imagePath)  // Base64编码的文件内容或者图像URL
-  })
-};
-
-// 对本地图像进行Base64编码
-function encodeImageToBase64(filePath) {
-  const bitmap = fs.readFileSync(filePath);
-  return Buffer.from(bitmap).toString('base64');
-}
-
-// 调用API
-axios.request(config)
-.then((response) =&gt; {
-    // 处理接口返回数据
-    const result = response.data[&quot;result&quot;];
-    const imageBuffer = Buffer.from(result[&quot;image&quot;], 'base64');
-    fs.writeFile(outputImagePath, imageBuffer, (err) =&gt; {
-      if (err) throw err;
-      console.log(`Output image saved at ${outputImagePath}`);
-    });
-    console.log(&quot;\nDetected texts:&quot;);
-    console.log(result[&quot;texts&quot;]);
-})
-.catch((error) =&gt; {
-  console.log(error);
-});
-</code></pre></details>
-
-<details><summary>PHP</summary>
-
-<pre><code class="language-php">&lt;?php
-
-$API_URL = &quot;http://localhost:8080/ocr&quot;; // 服务URL
-$image_path = &quot;./demo.jpg&quot;;
-$output_image_path = &quot;./out.jpg&quot;;
-
-// 对本地图像进行Base64编码
-$image_data = base64_encode(file_get_contents($image_path));
-$payload = array(&quot;image&quot; =&gt; $image_data); // Base64编码的文件内容或者图像URL
-
-// 调用API
-$ch = curl_init($API_URL);
-curl_setopt($ch, CURLOPT_POST, true);
-curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
-curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-$response = curl_exec($ch);
-curl_close($ch);
-
-// 处理接口返回数据
-$result = json_decode($response, true)[&quot;result&quot;];
-file_put_contents($output_image_path, base64_decode($result[&quot;image&quot;]));
-echo &quot;Output image saved at &quot; . $output_image_path . &quot;\n&quot;;
-echo &quot;\nDetected texts:\n&quot;;
-print_r($result[&quot;texts&quot;]);
-
-?&gt;
+print(&quot;\nDetected objects:&quot;)
+pprint.pp(result_infer[&quot;detectedObjects&quot;])
 </code></pre></details>
 </details>
 <br/>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.en.md
index 7513831482..79bfda81d4 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.en.md
@@ -217,9 +217,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -268,7 +269,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md
index 234a435c18..d4d248d3db 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md
@@ -216,9 +216,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -267,7 +268,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.en.md
index 29a67640f0..9349787118 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.en.md
@@ -803,9 +803,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -854,7 +855,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.md
index d87400ae3a..80d354e9b5 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_classification.md
@@ -802,9 +802,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -853,7 +854,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.en.md
index f975af8e44..6560cf2c7c 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.en.md
@@ -243,9 +243,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -294,7 +295,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.md
index d77eeea049..96a9c8cd78 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.md
@@ -246,9 +246,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -297,7 +298,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md
index 9b64abeae2..6ad46972d4 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md
@@ -332,9 +332,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -383,7 +384,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md b/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md
index 7af3b1c707..96518fa990 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md
@@ -327,9 +327,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -378,7 +379,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.en.md
index ec4fb5faa2..e8e52dd55d 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.en.md
@@ -492,9 +492,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -543,7 +544,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md b/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md
index b6c64a8f24..763b3b81e6 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md
@@ -493,9 +493,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -544,7 +545,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.en.md
index 6fb22cfc3a..3bae963e2c 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.en.md
@@ -250,9 +250,10 @@ Below are the API reference and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -301,7 +302,7 @@ Below are the API reference and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.md
index 3e46317514..918c3cf682 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.md
@@ -250,9 +250,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -301,7 +302,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md
index da1a4bfd71..9b7a4e0804 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md
@@ -364,9 +364,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -415,7 +416,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md b/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md
index 7fc0cd72e8..5305487667 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md
@@ -361,9 +361,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -412,7 +413,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
index ad8d4cbb28..ba5db67ce7 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
@@ -240,9 +240,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -291,7 +292,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
index 0f45a68ab0..05ee161f76 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
@@ -237,9 +237,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -288,7 +289,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.en.md
index a71dd36107..87fa9a1826 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.en.md
@@ -248,9 +248,10 @@ Below are the API reference and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -299,7 +300,7 @@ Below are the API reference and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.md
index 6b08cfa798..60250684c7 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.md
@@ -247,9 +247,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -298,7 +299,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md
index c97a080672..bec85e481c 100644
--- a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md
+++ b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md
@@ -617,9 +617,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -668,7 +669,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>analyzeImages</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md
index d3d2b8e94e..24384c4921 100644
--- a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md
+++ b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md
@@ -586,9 +586,10 @@ chat_result.print()
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -637,7 +638,7 @@ chat_result.print()
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>analyzeImages</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.en.md
index 0d431adaa0..8dab252a2d 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.en.md
@@ -309,9 +309,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -360,7 +361,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.md b/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.md
index 5379ef7579..f8942179df 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/OCR.md
@@ -311,9 +311,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -362,7 +363,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md
index 7119685673..9f7e031b1d 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md
@@ -290,9 +290,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -341,7 +342,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
index 512cdf3d04..73f30d4a9d 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
@@ -291,9 +291,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -342,7 +343,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md
index 2588f78009..84bb8d1904 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md
@@ -529,9 +529,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body attributes are as follows:</li>
 </ul>
 <table>
@@ -580,7 +581,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md
index cd08565101..2e8c8c3198 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md
@@ -532,9 +532,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -583,7 +584,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md
index c6196c068a..5e6a54835e 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md
@@ -439,9 +439,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -490,7 +491,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
index 4dcda8fc09..9b12cd12f3 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
@@ -449,9 +449,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -500,7 +501,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.en.md b/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.en.md
index 977316d1f2..a0b881a2ed 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.en.md
@@ -347,9 +347,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -398,7 +399,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.md b/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.md
index 56b47ce845..8715e7a351 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.md
@@ -431,9 +431,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -482,7 +483,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.en.md b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.en.md
index 54175e0eaa..fe36eb33f7 100644
--- a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.en.md
+++ b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.en.md
@@ -264,9 +264,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the properties of the response body are as follows:</li>
 </ul>
 <table>
@@ -315,7 +316,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.md b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.md
index 33a0563751..b7c91bad02 100644
--- a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.md
+++ b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.md
@@ -268,9 +268,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -319,7 +320,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.en.md b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.en.md
index 409a8bffe4..40ff892ebb 100644
--- a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.en.md
+++ b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.en.md
@@ -229,9 +229,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -280,7 +281,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service:</p>
+<p>Main operations provided by the service:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.md b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.md
index 6ee65c8654..a1eecdf28d 100644
--- a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.md
+++ b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification.md
@@ -226,9 +226,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -277,7 +278,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.en.md b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.en.md
index aac2accc45..bb850d7b24 100644
--- a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.en.md
+++ b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.en.md
@@ -274,9 +274,10 @@ Below are the API references and multi-language service invocation examples:
 
 <details><summary>API Reference</summary>
 
-<p>For all operations provided by the service:</p>
+<p>For main operations provided by the service:</p>
 <ul>
-<li>Both the response body and the request body for POST requests are JSON data (JSON objects).</li>
+<li>The HTTP request method is POST.</li>
+<li>The request body and the response body are both JSON data (JSON objects).</li>
 <li>When the request is processed successfully, the response status code is <code>200</code>, and the response body properties are as follows:</li>
 </ul>
 <table>
@@ -325,7 +326,7 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Operations provided by the service are as follows:</p>
+<p>Main operations provided by the service are as follows:</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.md b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.md
index 3e14c09f7a..1e1d8af696 100644
--- a/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.md
+++ b/docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.md
@@ -274,9 +274,10 @@ for res in output:
 
 <details><summary>API参考</summary>
 
-<p>对于服务提供的所有操作：</p>
+<p>对于服务提供的主要操作：</p>
 <ul>
-<li>响应体以及POST请求的请求体均为JSON数据（JSON对象）。</li>
+<li>HTTP请求方法为POST。</li>
+<li>请求体和响应体均为JSON数据（JSON对象）。</li>
 <li>当请求处理成功时，响应状态码为<code>200</code>，响应体的属性如下：</li>
 </ul>
 <table>
@@ -325,7 +326,7 @@ for res in output:
 </tr>
 </tbody>
 </table>
-<p>服务提供的操作如下：</p>
+<p>服务提供的主要操作如下：</p>
 <ul>
 <li><b><code>infer</code></b></li>
 </ul>
diff --git a/paddlex/inference/pipelines/ppchatocrv3/ppchatocrv3.py b/paddlex/inference/pipelines/ppchatocrv3/ppchatocrv3.py
index c207eb8ff2..4b9d558f98 100644
--- a/paddlex/inference/pipelines/ppchatocrv3/ppchatocrv3.py
+++ b/paddlex/inference/pipelines/ppchatocrv3/ppchatocrv3.py
@@ -240,7 +240,7 @@ def get_visual_result(
             img_info_list = list(self.img_reader(inputs))[0]
         elif isinstance(inputs, list):
             assert not any(
-                s.endswith(".pdf") for s in inputs
+                isinstance(s, str) and s.endswith(".pdf") for s in inputs
             ), "List containing pdf is not supported; only a list of images or a single PDF is supported."
             img_info_list = [x[0] for x in list(self.img_reader(inputs))]
 
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py b/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
index 998748c55f..9083e946b3 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
@@ -24,6 +24,7 @@
 from ...formula_recognition import FormulaRecognitionPipeline
 from ...layout_parsing import LayoutParsingPipeline
 from ...ocr import OCRPipeline
+from ...pp_shitu_v2 import ShiTuV2Pipeline
 from ...ppchatocrv3 import PPChatOCRPipeline
 from ...seal_recognition import SealOCRPipeline
 from ...single_model_pipeline import (
@@ -58,6 +59,7 @@
 from .vehicle_attribute_recognition import (
     create_pipeline_app as create_vehicle_attribute_recognition_app,
 )
+from .pp_shitu_v2 import create_pipeline_app as create_pp_shitu_v2_app
 from .ppchatocrv3 import create_pipeline_app as create_ppchatocrv3_app
 from .seal_recognition import create_pipeline_app as create_seal_recognition_app
 from .semantic_segmentation import (
@@ -180,6 +182,12 @@ def create_pipeline_app(
                 "Expected `pipeline` to be an instance of `VehicleAttributeRecPipeline`."
             )
         return create_vehicle_attribute_recognition_app(pipeline, app_config)
+    elif pipeline_name == "PP-ShiTuV2":
+        if not isinstance(pipeline, ShiTuV2Pipeline):
+            raise TypeError(
+                "Expected `pipeline` to be an instance of `ShiTuV2Pipeline`."
+            )
+        return create_pp_shitu_v2_app(pipeline, app_config)
     else:
         if BasePipeline.get(pipeline_name):
             raise ValueError(
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py b/paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py
index c9751ef2fb..2dd568b462 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py
@@ -60,8 +60,8 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
             pred = result["pred"][0].tolist()
             size = [len(pred), len(pred[0])]
             label_map = [item for sublist in pred for item in sublist]
-            output_image_base64 = serving_utils.image_to_base64(
-                result.img.convert("RGB")
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img.convert("RGB"))
             )
 
             return ResultResponse(
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
index acaeb7bee3..ae139025f7 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
@@ -88,7 +88,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         latex=latex,
                     )
                 )
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/image_classification.py b/paddlex/inference/pipelines/serving/_pipeline_apps/image_classification.py
index fe01e7df3e..dd1a89fca6 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/image_classification.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/image_classification.py
@@ -83,7 +83,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                 zip(result["class_ids"], cat_names, result["scores"]), None, top_k
             ):
                 categories.append(Category(id=id_, name=name, score=score))
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/instance_segmentation.py b/paddlex/inference/pipelines/serving/_pipeline_apps/instance_segmentation.py
index c90ee714e4..d434a94129 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/instance_segmentation.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/instance_segmentation.py
@@ -94,7 +94,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         mask=mask,
                     )
                 )
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py b/paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py
index aa1744cc49..841663a0e2 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py
@@ -15,7 +15,6 @@
 import os
 from typing import Final, List, Literal, Optional, Tuple
 
-import cv2
 import numpy as np
 from fastapi import FastAPI, HTTPException
 from numpy.typing import ArrayLike
@@ -24,7 +23,7 @@
 
 from .....utils import logging
 from ...layout_parsing import LayoutParsingPipeline
-from .. import file_storage
+from ..storage import SupportsGetURL, Storage, create_storage
 from .. import utils as serving_utils
 from ..app import AppConfig, create_app
 from ..models import Response, ResultResponse
@@ -72,16 +71,17 @@ def _postprocess_image(
     img: ArrayLike,
     request_id: str,
     filename: str,
-    file_storage_config: file_storage.FileStorageConfig,
+    file_storage: Optional[Storage],
 ) -> str:
     key = f"{request_id}/{filename}"
     ext = os.path.splitext(filename)[1]
     img = np.asarray(img)
-    _, encoded_img = cv2.imencode(ext, img)
-    encoded_img = encoded_img.tobytes()
-    return file_storage.postprocess_file(
-        encoded_img, config=file_storage_config, key=key
-    )
+    img_bytes = serving_utils.image_array_to_bytes(img, ext=ext)
+    if file_storage is not None:
+        file_storage.set(key, img_bytes)
+        if isinstance(file_storage, SupportsGetURL):
+            return file_storage.get_url(key)
+    return serving_utils.base64_encode(img_bytes)
 
 
 def create_pipeline_app(
@@ -91,12 +91,10 @@ def create_pipeline_app(
         pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
     )
 
-    if "file_storage_config" in ctx.extra:
-        ctx.extra["file_storage_config"] = file_storage.parse_file_storage_config(
-            ctx.extra["file_storage_config"]
-        )
+    if ctx.config.extra and "file_storage" in ctx.config.extra:
+        ctx.extra["file_storage"] = create_storage(ctx.config.extra["file_storage"])
     else:
-        ctx.extra["file_storage_config"] = file_storage.InMemoryStorageConfig()
+        ctx.extra["file_storage"] = None
     ctx.extra.setdefault("max_img_size", _DEFAULT_MAX_IMG_SIZE)
     ctx.extra.setdefault("max_num_imgs", _DEFAULT_MAX_NUM_IMGS)
 
@@ -172,7 +170,7 @@ async def _infer(
                             subitem[label]["img"],
                             request_id=request_id,
                             filename=f"image_{i}_{j}.jpg",
-                            file_storage_config=ctx.extra["file_storage_config"],
+                            file_storage=ctx.extra["file_storage"],
                         )
                         text = subitem[label]["image_text"]
                     else:
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py b/paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py
index 5f357c244c..59cf4c9f29 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py
@@ -72,7 +72,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                 result["class_ids"], cat_names, result["scores"]
             ):
                 categories.append(Category(id=id_, name=name, score=score))
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/object_detection.py b/paddlex/inference/pipelines/serving/_pipeline_apps/object_detection.py
index e758b17c73..68c039015d 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/object_detection.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/object_detection.py
@@ -72,7 +72,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         score=obj["score"],
                     )
                 )
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/ocr.py b/paddlex/inference/pipelines/serving/_pipeline_apps/ocr.py
index 7aa194abaa..d8f83d5171 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/ocr.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/ocr.py
@@ -80,7 +80,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                 result["dt_polys"], result["rec_text"], result["rec_score"]
             ):
                 texts.append(Text(poly=poly, text=text, score=score))
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/pedestrian_attribute_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/pedestrian_attribute_recognition.py
index 1b4a2eae60..8f4c42fa2a 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/pedestrian_attribute_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/pedestrian_attribute_recognition.py
@@ -84,7 +84,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         score=obj["det_score"],
                     )
                 )
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py b/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py
new file mode 100644
index 0000000000..b17f7d7ec5
--- /dev/null
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py
@@ -0,0 +1,313 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import faiss
+import pickle
+from typing import Dict, List, Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
+
+from .....utils import logging
+from ....components.retrieval.faiss import IndexData
+from ...pp_shitu_v2 import ShiTuV2Pipeline
+from ..storage import create_storage
+from .. import utils as serving_utils
+from ..app import AppConfig, create_app
+from ..models import Response, ResultResponse
+
+
+class ImageLabelPair(BaseModel):
+    image: str
+    label: str
+
+
+class BuildIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+
+
+class BuildIndexResult(BaseModel):
+    indexKey: str
+    idMap: Dict[int, str]
+
+
+class AddImagesToIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+    indexKey: str
+
+
+class AddImagesToIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+class RemoveImagesFromIndexRequest(BaseModel):
+    ids: List[int]
+    indexKey: str
+
+
+class RemoveImagesFromIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+class InferRequest(BaseModel):
+    image: str
+    indexKey: Optional[str] = None
+
+
+BoundingBox: TypeAlias = Annotated[List[float], Field(min_length=4, max_length=4)]
+
+
+class RecResult(BaseModel):
+    label: str
+    score: float
+
+
+class DetectedObject(BaseModel):
+    bbox: BoundingBox
+    recResults: List[RecResult]
+    score: float
+
+
+class InferResult(BaseModel):
+    detectedObjects: List[DetectedObject]
+    image: str
+
+
+# XXX: I have to implement serialization and deserialization functions myself,
+# which is fragile.
+def _serialize_index_data(index_data: IndexData) -> bytes:
+    tup = (index_data.index_bytes, index_data.index_info)
+    return pickle.dumps(tup)
+
+
+def _deserialize_index_data(index_data_bytes: bytes) -> IndexData:
+    tup = pickle.loads(index_data_bytes)
+    index = faiss.deserialize_index(tup[0])
+    return IndexData(index, tup[1])
+
+
+def create_pipeline_app(pipeline: ShiTuV2Pipeline, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    if ctx.config.extra and "index_storage" in ctx.config.extra:
+        ctx.extra["index_storage"] = create_storage(ctx.config.extra["index_storage"])
+    else:
+        ctx.extra["index_storage"] = create_storage({"type": "memory"})
+
+    @app.post(
+        "/shitu-index-build",
+        operation_id="buildIndex",
+        responses={422: {"model": Response}},
+    )
+    async def _build_index(
+        request: BuildIndexRequest,
+    ) -> ResultResponse[BuildIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        request_id = serving_utils.generate_request_id()
+
+        try:
+            images = [pair.image for pair in request.imageLabelPairs]
+            file_bytes_list = await asyncio.gather(
+                *(serving_utils.get_raw_bytes(img, aiohttp_session) for img in images)
+            )
+            images = [
+                serving_utils.image_bytes_to_array(item) for item in file_bytes_list
+            ]
+            labels = [pair.label for pair in request.imageLabelPairs]
+
+            index_data = await pipeline.call(
+                pipeline.pipeline.build_index, images, labels
+            )
+
+            index_storage = ctx.extra["index_storage"]
+            index_key = request_id
+            index_data_bytes = await serving_utils.call_async(
+                _serialize_index_data, index_data
+            )
+            await serving_utils.call_async(
+                index_storage.set, index_key, index_data_bytes
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=BuildIndexResult(indexKey=index_key, idMap=index_data.id_map),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    @app.post(
+        "/shitu-index-add",
+        operation_id="buildIndex",
+        responses={422: {"model": Response}},
+    )
+    async def _add_images_to_index(
+        request: AddImagesToIndexRequest,
+    ) -> ResultResponse[AddImagesToIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        try:
+            images = [pair.image for pair in request.imageLabelPairs]
+            file_bytes_list = await asyncio.gather(
+                *(serving_utils.get_raw_bytes(img, aiohttp_session) for img in images)
+            )
+            images = [
+                serving_utils.image_bytes_to_array(item) for item in file_bytes_list
+            ]
+            labels = [pair.label for pair in request.imageLabelPairs]
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = await serving_utils.call_async(
+                _deserialize_index_data, index_data_bytes
+            )
+
+            index_data = await pipeline.call(
+                pipeline.pipeline.append_index, images, labels, index_data
+            )
+
+            index_data_bytes = await serving_utils.call_async(
+                _serialize_index_data, index_data
+            )
+            await serving_utils.call_async(
+                index_storage.set, request.indexKey, index_data_bytes
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=AddImagesToIndexResult(idMap=index_data.id_map),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    @app.post(
+        "/shitu-index-remove",
+        operation_id="buildIndex",
+        responses={422: {"model": Response}},
+    )
+    async def _remove_images_from_index(
+        request: RemoveImagesFromIndexRequest,
+    ) -> ResultResponse[RemoveImagesFromIndexResult]:
+        pipeline = ctx.pipeline
+
+        try:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = await serving_utils.call_async(
+                _deserialize_index_data, index_data_bytes
+            )
+
+            index_data = await pipeline.call(
+                pipeline.pipeline.remove_index, request.ids, index_data
+            )
+
+            index_data_bytes = await serving_utils.call_async(
+                _serialize_index_data, index_data
+            )
+            await serving_utils.call_async(
+                index_storage.set, request.indexKey, index_data_bytes
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=RemoveImagesFromIndexResult(idMap=index_data.id_map),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    @app.post(
+        "/shitu-infer",
+        operation_id="infer",
+        responses={422: {"model": Response}},
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        try:
+            image_bytes = await serving_utils.get_raw_bytes(
+                request.image, aiohttp_session
+            )
+            image = serving_utils.image_bytes_to_array(image_bytes)
+
+            if request.indexKey is not None:
+                index_storage = ctx.extra["index_storage"]
+                index_data_bytes = await serving_utils.call_async(
+                    index_storage.get, request.indexKey
+                )
+                index_data = await serving_utils.call_async(
+                    _deserialize_index_data, index_data_bytes
+                )
+            else:
+                index_data = None
+
+            result = list(
+                await pipeline.call(pipeline.pipeline.predict, image, index=index_data)
+            )[0]
+
+            objects: List[DetectedObject] = []
+            for obj in result["boxes"]:
+                rec_results: List[RecResult] = []
+                if obj["rec_scores"] is not None:
+                    for label, score in zip(obj["labels"], obj["rec_scores"]):
+                        rec_results.append(
+                            RecResult(
+                                label=label,
+                                score=score,
+                            )
+                        )
+                objects.append(
+                    DetectedObject(
+                        bbox=obj["coordinate"],
+                        recResults=rec_results,
+                        score=obj["det_score"],
+                    )
+                )
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=InferResult(detectedObjects=objects, image=output_image_base64),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    return app
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py b/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py
index 87f2241ba4..747652478d 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py
@@ -16,7 +16,6 @@
 import os
 from typing import Awaitable, Final, List, Literal, Optional, Tuple, Union
 
-import cv2
 import numpy as np
 from fastapi import FastAPI, HTTPException
 from numpy.typing import ArrayLike
@@ -26,7 +25,7 @@
 from .....utils import logging
 from .... import results
 from ...ppchatocrv3 import PPChatOCRPipeline
-from .. import file_storage
+from ..storage import SupportsGetURL, Storage, create_storage
 from .. import utils as serving_utils
 from ..app import AppConfig, create_app
 from ..models import Response, ResultResponse
@@ -169,16 +168,17 @@ def _postprocess_image(
     img: ArrayLike,
     request_id: str,
     filename: str,
-    file_storage_config: file_storage.FileStorageConfig,
+    file_storage: Optional[Storage],
 ) -> str:
     key = f"{request_id}/{filename}"
     ext = os.path.splitext(filename)[1]
     img = np.asarray(img)
-    _, encoded_img = cv2.imencode(ext, img)
-    encoded_img = encoded_img.tobytes()
-    return file_storage.postprocess_file(
-        encoded_img, config=file_storage_config, key=key
-    )
+    img_bytes = serving_utils.image_array_to_bytes(img, ext=ext)
+    if file_storage is not None:
+        file_storage.set(key, img_bytes)
+        if isinstance(file_storage, SupportsGetURL):
+            return file_storage.get_url(key)
+    return serving_utils.base64_encode(img_bytes)
 
 
 def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> FastAPI:
@@ -186,12 +186,10 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
         pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
     )
 
-    if "file_storage_config" in ctx.extra:
-        ctx.extra["file_storage_config"] = file_storage.parse_file_storage_config(
-            ctx.extra["file_storage_config"]
-        )
+    if ctx.config.extra and "file_storage" in ctx.config.extra:
+        ctx.extra["file_storage"] = create_storage(ctx.config.extra["file_storage"])
     else:
-        ctx.extra["file_storage_config"] = file_storage.InMemoryStorageConfig()
+        ctx.extra["file_storage"] = None
     ctx.extra.setdefault("max_img_size", _DEFAULT_MAX_IMG_SIZE)
     ctx.extra.setdefault("max_num_imgs", _DEFAULT_MAX_NUM_IMGS)
 
@@ -259,7 +257,7 @@ async def _analyze_images(
                     img,
                     request_id=request_id,
                     filename=f"input_image_{i}.jpg",
-                    file_storage_config=ctx.extra["file_storage_config"],
+                    file_storage=ctx.extra["file_storage"],
                 )
                 pp_img_futures.append(future)
                 future = serving_utils.call_async(
@@ -267,7 +265,7 @@ async def _analyze_images(
                     item["ocr_result"].img,
                     request_id=request_id,
                     filename=f"ocr_image_{i}.jpg",
-                    file_storage_config=ctx.extra["file_storage_config"],
+                    file_storage=ctx.extra["file_storage"],
                 )
                 pp_img_futures.append(future)
                 future = serving_utils.call_async(
@@ -275,7 +273,7 @@ async def _analyze_images(
                     item["layout_result"].img,
                     request_id=request_id,
                     filename=f"layout_image_{i}.jpg",
-                    file_storage_config=ctx.extra["file_storage_config"],
+                    file_storage=ctx.extra["file_storage"],
                 )
                 pp_img_futures.append(future)
                 texts: List[Text] = []
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
index 9a746a7955..7d9ec20fcc 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
@@ -89,8 +89,8 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                 ):
                     texts.append(Text(poly=poly, text=text, score=score))
                 seal_impressions.append(SealImpression(texts=texts))
-            layout_image_base64 = serving_utils.image_to_base64(
-                result["layout_result"].img
+            layout_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result["layout_result"].img)
             )
 
             # TODO: OCR image
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/semantic_segmentation.py b/paddlex/inference/pipelines/serving/_pipeline_apps/semantic_segmentation.py
index 5a412475e2..7870eecbb7 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/semantic_segmentation.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/semantic_segmentation.py
@@ -62,8 +62,8 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
             pred = result["pred"][0].tolist()
             size = [len(pred), len(pred[0])]
             label_map = [item for sublist in pred for item in sublist]
-            output_image_base64 = serving_utils.image_to_base64(
-                result.img.convert("RGB")
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img.convert("RGB"))
             )
 
             return ResultResponse(
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py b/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
index b6c5c5288c..fe67240312 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
@@ -72,7 +72,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         score=obj["score"],
                     )
                 )
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py
index 32c5c9008f..36e221efff 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/table_recognition.py
@@ -85,10 +85,12 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         html=item["html"],
                     )
                 )
-            layout_image_base64 = serving_utils.image_to_base64(
-                result["layout_result"].img
+            layout_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result["layout_result"].img)
+            )
+            ocr_iamge_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result["ocr_result"].img)
             )
-            ocr_iamge_base64 = serving_utils.image_to_base64(result["ocr_result"].img)
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/ts_ad.py b/paddlex/inference/pipelines/serving/_pipeline_apps/ts_ad.py
index bc36c55f66..cd5710d71f 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/ts_ad.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/ts_ad.py
@@ -50,7 +50,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
 
             result = (await pipeline.infer(df))[0]
 
-            output_csv = serving_utils.data_frame_to_base64(result["anomaly"])
+            output_csv = serving_utils.base64_encode(
+                serving_utils.data_frame_to_bytes(result["anomaly"])
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/ts_fc.py b/paddlex/inference/pipelines/serving/_pipeline_apps/ts_fc.py
index d840184516..a7cb80c368 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/ts_fc.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/ts_fc.py
@@ -50,7 +50,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
 
             result = (await pipeline.infer(df))[0]
 
-            output_csv = serving_utils.data_frame_to_base64(result["forecast"])
+            output_csv = serving_utils.base64_encode(
+                serving_utils.data_frame_to_bytes(result["forecast"])
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/vehicle_attribute_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/vehicle_attribute_recognition.py
index db59313edb..0c9f485982 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/vehicle_attribute_recognition.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/vehicle_attribute_recognition.py
@@ -84,7 +84,9 @@ async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
                         score=obj["det_score"],
                     )
                 )
-            output_image_base64 = serving_utils.image_to_base64(result.img)
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
diff --git a/paddlex/inference/pipelines/serving/app.py b/paddlex/inference/pipelines/serving/app.py
index 96b3eb105f..bbe44ddec6 100644
--- a/paddlex/inference/pipelines/serving/app.py
+++ b/paddlex/inference/pipelines/serving/app.py
@@ -131,6 +131,7 @@ async def _app_lifespan(app: fastapi.FastAPI) -> AsyncGenerator[None, None]:
 
     app = fastapi.FastAPI(lifespan=_app_lifespan)
     ctx = AppContext[_PipelineT](config=app_config)
+    app.state.context = ctx
 
     @app.get("/health", operation_id="checkHealth")
     async def _check_health() -> Response:
diff --git a/paddlex/inference/pipelines/serving/file_storage.py b/paddlex/inference/pipelines/serving/file_storage.py
deleted file mode 100644
index 9104afb45e..0000000000
--- a/paddlex/inference/pipelines/serving/file_storage.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import base64
-import uuid
-from typing import Any, Dict, Literal, Optional, Union
-
-from baidubce.auth.bce_credentials import BceCredentials
-from baidubce.bce_client_configuration import BceClientConfiguration
-from baidubce.services.bos.bos_client import BosClient
-from pydantic import BaseModel, Discriminator, SecretStr, TypeAdapter
-from typing_extensions import Annotated, assert_never
-
-
-class InMemoryStorageConfig(BaseModel):
-    type: Literal["memory"] = "memory"
-
-
-class BOSConfig(BaseModel):
-    endpoint: str
-    ak: SecretStr
-    sk: SecretStr
-    bucket_name: str
-    key_prefix: Optional[str] = None
-    connection_timeout_in_mills: Optional[int] = None
-
-    type: Literal["bos"] = "bos"
-
-
-FileStorageConfig = Union[InMemoryStorageConfig, BOSConfig]
-
-
-def parse_file_storage_config(dic: Dict[str, Any]) -> FileStorageConfig:
-    # XXX: mypy deduces a wrong type
-    return TypeAdapter(
-        Annotated[FileStorageConfig, Discriminator("type")]
-    ).validate_python(
-        dic
-    )  # type: ignore[return-value]
-
-
-def postprocess_file(
-    file: bytes, config: FileStorageConfig, key: Optional[str] = None
-) -> str:
-    if config.type == "memory":
-        return base64.b64encode(file).decode("ascii")
-    elif config.type == "bos":
-        # TODO: Currently BOS clients are created on the fly since they are not
-        # thread-safe. Should we use a background thread with a queue or use a
-        # dedicated thread?
-        bos_cfg = BceClientConfiguration(
-            credentials=BceCredentials(
-                config.ak.get_secret_value(), config.sk.get_secret_value()
-            ),
-            endpoint=config.endpoint,
-            connection_timeout_in_mills=config.connection_timeout_in_mills,
-        )
-        client = BosClient(bos_cfg)
-        if key is None:
-            key = str(uuid.uuid4())
-        if config.key_prefix:
-            key = f"{config.key_prefix}{key}"
-        client.put_object_from_string(bucket=config.bucket_name, key=key, data=file)
-        url = client.generate_pre_signed_url(
-            config.bucket_name, key, expiration_in_seconds=-1
-        ).decode("ascii")
-        return url
-    else:
-        assert_never(config.type)
diff --git a/paddlex/inference/pipelines/serving/storage.py b/paddlex/inference/pipelines/serving/storage.py
new file mode 100644
index 0000000000..544e3da8f5
--- /dev/null
+++ b/paddlex/inference/pipelines/serving/storage.py
@@ -0,0 +1,161 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+from os import PathLike
+from pathlib import Path
+from typing import Any, Dict, Literal, Optional, Union, Protocol, runtime_checkable
+
+from baidubce.auth.bce_credentials import BceCredentials
+from baidubce.bce_client_configuration import BceClientConfiguration
+from baidubce.services.bos.bos_client import BosClient
+from pydantic import BaseModel, Discriminator, SecretStr, TypeAdapter
+from typing_extensions import Annotated, assert_never
+
+
+class InMemoryStorageConfig(BaseModel):
+    type: Literal["memory"] = "memory"
+
+
+class FileSystemStorageConfig(BaseModel):
+    directory: Union[str, PathLike]
+
+    type: Literal["file_system"] = "file_system"
+
+
+class BOSConfig(BaseModel):
+    endpoint: str
+    ak: SecretStr
+    sk: SecretStr
+    bucket_name: str
+    key_prefix: Optional[str] = None
+    connection_timeout_in_mills: Optional[int] = None
+
+    type: Literal["bos"] = "bos"
+
+
+FileStorageConfig = Union[InMemoryStorageConfig, FileSystemStorageConfig, BOSConfig]
+
+
+@runtime_checkable
+class SupportsGetURL(Protocol):
+    def get_url(self, key: str) -> str: ...
+
+
+class Storage(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def get(self, key: str) -> bytes:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def set(self, key: str, value: bytes) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def delete(self, key: str) -> None:
+        raise NotImplementedError
+
+
+class InMemoryStorage(Storage):
+    def __init__(self, config: InMemoryStorageConfig) -> None:
+        super().__init__()
+        self._data: Dict[str, bytes] = {}
+
+    def get(self, key: str) -> bytes:
+        return self._data[key]
+
+    def set(self, key: str, value: bytes) -> None:
+        self._data[key] = value
+
+    def delete(self, key: str) -> None:
+        del self._data[key]
+
+
+class FileSystemStorage(Storage):
+    def __init__(self, config: FileSystemStorageConfig) -> None:
+        super().__init__()
+        self._directory = Path(config.directory)
+        self._directory.mkdir(exist_ok=True)
+
+    def get(self, key: str) -> bytes:
+        with open(self._get_file_path(key), "rb") as f:
+            contents = f.read()
+        return contents
+
+    def set(self, key: str, value: bytes) -> None:
+        path = self._get_file_path(key)
+        path.parent.mkdir(exist_ok=True)
+        with open(path, "wb") as f:
+            f.write(value)
+
+    def delete(self, key: str) -> None:
+        file_path = self._get_file_path(key)
+        file_path.unlink(missing_ok=True)
+
+    def _get_file_path(self, key: str) -> Path:
+        return self._directory / key
+
+
+class BOS(Storage):
+    def __init__(self, config: BOSConfig) -> None:
+        super().__init__()
+        bos_cfg = BceClientConfiguration(
+            credentials=BceCredentials(
+                config.ak.get_secret_value(), config.sk.get_secret_value()
+            ),
+            endpoint=config.endpoint,
+            connection_timeout_in_mills=config.connection_timeout_in_mills,
+        )
+        self._client = BosClient(bos_cfg)
+        self._bucket_name = config.bucket_name
+        self._key_prefix = config.key_prefix
+
+    def get(self, key: str) -> bytes:
+        key = self._get_full_key(key)
+        return self._client.get_object_as_string(bucket_name=self._bucket_name, key=key)
+
+    def set(self, key: str, value: bytes) -> None:
+        key = self._get_full_key(key)
+        self._client.put_object_from_string(
+            bucket=self._bucket_name, key=key, data=value
+        )
+
+    def delete(self, key: str) -> None:
+        key = self._get_full_key(key)
+        self._client.delete_object(bucket_name=self._bucket_name, key=key)
+
+    def get_url(self, key: str) -> str:
+        key = self._get_full_key(key)
+        return self._client.generate_pre_signed_url(
+            self._bucket_name, key, expiration_in_seconds=-1
+        ).decode("ascii")
+
+    def _get_full_key(self, key: str) -> str:
+        if self._key_prefix:
+            return f"{self._key_prefix}/{key}"
+        return key
+
+
+def create_storage(dic: Dict[str, Any], /) -> Storage:
+    config = TypeAdapter(
+        Annotated[FileStorageConfig, Discriminator("type")]
+    ).validate_python(dic)
+    if config.type == "memory":
+        return InMemoryStorage(config)
+    elif config.type == "file_system":
+        return FileSystemStorage(config)
+    elif config.type == "bos":
+        return BOS(config)
+    else:
+        assert_never(config)
diff --git a/paddlex/inference/pipelines/serving/utils.py b/paddlex/inference/pipelines/serving/utils.py
index 32b2bdd7a1..f960f3e63a 100644
--- a/paddlex/inference/pipelines/serving/utils.py
+++ b/paddlex/inference/pipelines/serving/utils.py
@@ -101,11 +101,20 @@ def image_bytes_to_array(data: bytes) -> np.ndarray:
     return cv2.imdecode(np.frombuffer(data, np.uint8), cv2.IMREAD_COLOR)
 
 
-def image_to_base64(image: Image.Image) -> str:
+def image_bytes_to_image(data: bytes) -> Image.Image:
+    return Image.open(io.BytesIO(data))
+
+
+def image_to_bytes(image: Image.Image, format: str = "JPEG") -> bytes:
     with io.BytesIO() as f:
-        image.save(f, format="JPEG")
-        image_base64 = base64.b64encode(f.getvalue()).decode("ascii")
-    return image_base64
+        image.save(f, format=format)
+        img_bytes = f.getvalue()
+    return img_bytes
+
+
+def image_array_to_bytes(image: np.ndarray, ext: str = ".jpg") -> str:
+    image = cv2.imencode(ext, image)[1]
+    return image.tobytes()
 
 
 def csv_bytes_to_data_frame(data: bytes) -> pd.DataFrame:
@@ -114,8 +123,12 @@ def csv_bytes_to_data_frame(data: bytes) -> pd.DataFrame:
     return df
 
 
-def data_frame_to_base64(df: str) -> str:
-    return base64.b64encode(df.to_csv().encode("utf-8")).decode("ascii")
+def data_frame_to_bytes(df: str) -> str:
+    return df.to_csv().encode("utf-8")
+
+
+def base64_encode(data: bytes) -> str:
+    return base64.b64encode(data).decode("ascii")
 
 
 def read_pdf(

From 27adcdeee87ddc0d2b3b287ebc81fa04864bee69 Mon Sep 17 00:00:00 2001
From: Bobholamovic <bob1998425@hotmail.com>
Date: Fri, 15 Nov 2024 14:54:17 +0800
Subject: [PATCH 2/5] Fix index_type and metric_type

---
 .../general_image_recognition.en.md            | 18 ++++++++++++++----
 .../cv_pipelines/general_image_recognition.md  | 10 ++++++++++
 .../serving/_pipeline_apps/pp_shitu_v2.py      |  8 +++++++-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md
index 3093c219a1..d2aed56ebe 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.en.md
@@ -738,13 +738,14 @@ import requests
 API_BASE_URL = &quot;http://0.0.0.0:8080&quot;
 
 base_image_label_pairs = [
-    {&quot;image&quot;: &quot;./demo0.jpg&quot;, &quot;label&quot;: &quot;rabbit&quot;},
-    {&quot;image&quot;: &quot;./demo1.jpg&quot;, &quot;label&quot;: &quot;rabbit&quot;},
-    {&quot;image&quot;: &quot;./demo2.jpg&quot;, &quot;label&quot;: &quot;puppy&quot;},
+    {&quot;image&quot;: &quot;./demo0.jpg&quot;, &quot;label&quot;: &quot;兔子&quot;},
+    {&quot;image&quot;: &quot;./demo1.jpg&quot;, &quot;label&quot;: &quot;兔子&quot;},
+    {&quot;image&quot;: &quot;./demo2.jpg&quot;, &quot;label&quot;: &quot;小狗&quot;},
 ]
 image_label_pairs_to_add = [
-    {&quot;image&quot;: &quot;./demo3.jpg&quot;, &quot;label&quot;: &quot;puppy&quot;},
+    {&quot;image&quot;: &quot;./demo3.jpg&quot;, &quot;label&quot;: &quot;小狗&quot;},
 ]
+ids_to_remove = [1]
 infer_image_path = &quot;./demo4.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
@@ -778,6 +779,15 @@ if resp_index_add.status_code != 200:
 result_index_add = resp_index_add.json()[&quot;result&quot;]
 print(f&quot;Number of images indexed: {len(result_index_add['idMap'])}&quot;)
 
+payload = {&quot;ids&quot;: ids_to_remove, &quot;indexKey&quot;: result_index_build[&quot;indexKey&quot;]}
+resp_index_remove = requests.post(f&quot;{API_BASE_URL}/shitu-index-remove&quot;, json=payload)
+if resp_index_remove.status_code != 200:
+    print(f&quot;Request to shitu-index-remove failed with status code {resp_index_remove}.&quot;)
+    pprint.pp(resp_index_remove.json())
+    sys.exit(1)
+result_index_remove = resp_index_remove.json()[&quot;result&quot;]
+print(f&quot;Number of images indexed: {len(result_index_remove['idMap'])}&quot;)
+
 with open(infer_image_path, &quot;rb&quot;) as file:
     image_bytes = file.read()
     image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md
index 961a83873d..b27ae78244 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/general_image_recognition.md
@@ -775,6 +775,7 @@ base_image_label_pairs = [
 image_label_pairs_to_add = [
     {&quot;image&quot;: &quot;./demo3.jpg&quot;, &quot;label&quot;: &quot;小狗&quot;},
 ]
+ids_to_remove = [1]
 infer_image_path = &quot;./demo4.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
@@ -808,6 +809,15 @@ if resp_index_add.status_code != 200:
 result_index_add = resp_index_add.json()[&quot;result&quot;]
 print(f&quot;Number of images indexed: {len(result_index_add['idMap'])}&quot;)
 
+payload = {&quot;ids&quot;: ids_to_remove, &quot;indexKey&quot;: result_index_build[&quot;indexKey&quot;]}
+resp_index_remove = requests.post(f&quot;{API_BASE_URL}/shitu-index-remove&quot;, json=payload)
+if resp_index_remove.status_code != 200:
+    print(f&quot;Request to shitu-index-remove failed with status code {resp_index_remove}.&quot;)
+    pprint.pp(resp_index_remove.json())
+    sys.exit(1)
+result_index_remove = resp_index_remove.json()[&quot;result&quot;]
+print(f&quot;Number of images indexed: {len(result_index_remove['idMap'])}&quot;)
+
 with open(infer_image_path, &quot;rb&quot;) as file:
     image_bytes = file.read()
     image_data = base64.b64encode(image_bytes).decode(&quot;ascii&quot;)
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py b/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py
index b17f7d7ec5..84bae79207 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/pp_shitu_v2.py
@@ -132,8 +132,14 @@ async def _build_index(
             ]
             labels = [pair.label for pair in request.imageLabelPairs]
 
+            # TODO: Support specifying `index_type` and `metric_type` in the
+            # request
             index_data = await pipeline.call(
-                pipeline.pipeline.build_index, images, labels
+                pipeline.pipeline.build_index,
+                images,
+                labels,
+                index_type="Flat",
+                metric_type="IP",
             )
 
             index_storage = ctx.extra["index_storage"]

From e235a2b336d4f659e42a50226085513b25d2a9a3 Mon Sep 17 00:00:00 2001
From: Bobholamovic <bob1998425@hotmail.com>
Date: Fri, 15 Nov 2024 16:47:19 +0800
Subject: [PATCH 3/5] Add face recognition app

---
 .../serving/_pipeline_apps/__init__.py        |   8 +
 .../_pipeline_apps/face_recognition.py        | 317 ++++++++++++++++++
 2 files changed, 325 insertions(+)
 create mode 100644 paddlex/inference/pipelines/serving/_pipeline_apps/face_recognition.py

diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py b/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
index 9083e946b3..2ef035becd 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
@@ -21,6 +21,7 @@
     VehicleAttributeRecPipeline,
 )
 from ...base import BasePipeline
+from ...face_recognition import FaceRecPipeline
 from ...formula_recognition import FormulaRecognitionPipeline
 from ...layout_parsing import LayoutParsingPipeline
 from ...ocr import OCRPipeline
@@ -42,6 +43,7 @@
 from ...table_recognition import TableRecPipeline
 from ..app import create_app_config
 from .anomaly_detection import create_pipeline_app as create_anomaly_detection_app
+from .face_recognition import create_pipeline_app as create_face_recognition_app
 from .formula_recognition import create_pipeline_app as create_formula_recognition_app
 from .layout_parsing import create_pipeline_app as create_layout_parsing_app
 from .image_classification import create_pipeline_app as create_image_classification_app
@@ -188,6 +190,12 @@ def create_pipeline_app(
                 "Expected `pipeline` to be an instance of `ShiTuV2Pipeline`."
             )
         return create_pp_shitu_v2_app(pipeline, app_config)
+    elif pipeline_name == "face_recognition":
+        if not isinstance(pipeline, FaceRecPipeline):
+            raise TypeError(
+                "Expected `pipeline` to be an instance of `FaceRecPipeline`."
+            )
+        return create_face_recognition_app(pipeline, app_config)
     else:
         if BasePipeline.get(pipeline_name):
             raise ValueError(
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/face_recognition.py b/paddlex/inference/pipelines/serving/_pipeline_apps/face_recognition.py
new file mode 100644
index 0000000000..b7a47aae30
--- /dev/null
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/face_recognition.py
@@ -0,0 +1,317 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import faiss
+import pickle
+from typing import Dict, List, Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
+
+from .....utils import logging
+from ....components.retrieval.faiss import IndexData
+from ...face_recognition import FaceRecPipeline
+from ..storage import create_storage
+from .. import utils as serving_utils
+from ..app import AppConfig, create_app
+from ..models import Response, ResultResponse
+
+
+class ImageLabelPair(BaseModel):
+    image: str
+    label: str
+
+
+class BuildIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+
+
+class BuildIndexResult(BaseModel):
+    indexKey: str
+    idMap: Dict[int, str]
+
+
+class AddImagesToIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+    indexKey: str
+
+
+class AddImagesToIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+class RemoveImagesFromIndexRequest(BaseModel):
+    ids: List[int]
+    indexKey: str
+
+
+class RemoveImagesFromIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+class InferRequest(BaseModel):
+    image: str
+    indexKey: Optional[str] = None
+
+
+BoundingBox: TypeAlias = Annotated[List[float], Field(min_length=4, max_length=4)]
+
+
+class RecResult(BaseModel):
+    label: str
+    score: float
+
+
+class Face(BaseModel):
+    bbox: BoundingBox
+    recResults: List[RecResult]
+    score: float
+
+
+class InferResult(BaseModel):
+    faces: List[Face]
+    image: str
+
+
+def _serialize_index_data(index_data: IndexData) -> bytes:
+    tup = (index_data.index_bytes, index_data.index_info)
+    return pickle.dumps(tup)
+
+
+def _deserialize_index_data(index_data_bytes: bytes) -> IndexData:
+    tup = pickle.loads(index_data_bytes)
+    index = faiss.deserialize_index(tup[0])
+    return IndexData(index, tup[1])
+
+
+def create_pipeline_app(pipeline: FaceRecPipeline, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    if ctx.config.extra and "index_storage" in ctx.config.extra:
+        ctx.extra["index_storage"] = create_storage(ctx.config.extra["index_storage"])
+    else:
+        ctx.extra["index_storage"] = create_storage({"type": "memory"})
+
+    @app.post(
+        "/face-recognition-index-build",
+        operation_id="buildIndex",
+        responses={422: {"model": Response}},
+    )
+    async def _build_index(
+        request: BuildIndexRequest,
+    ) -> ResultResponse[BuildIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        request_id = serving_utils.generate_request_id()
+
+        try:
+            images = [pair.image for pair in request.imageLabelPairs]
+            file_bytes_list = await asyncio.gather(
+                *(serving_utils.get_raw_bytes(img, aiohttp_session) for img in images)
+            )
+            images = [
+                serving_utils.image_bytes_to_array(item) for item in file_bytes_list
+            ]
+            labels = [pair.label for pair in request.imageLabelPairs]
+
+            # TODO: Support specifying `index_type` and `metric_type` in the
+            # request
+            index_data = await pipeline.call(
+                pipeline.pipeline.build_index,
+                images,
+                labels,
+                index_type="Flat",
+                metric_type="IP",
+            )
+
+            index_storage = ctx.extra["index_storage"]
+            index_key = request_id
+            index_data_bytes = await serving_utils.call_async(
+                _serialize_index_data, index_data
+            )
+            await serving_utils.call_async(
+                index_storage.set, index_key, index_data_bytes
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=BuildIndexResult(indexKey=index_key, idMap=index_data.id_map),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    @app.post(
+        "/face-recognition-index-add",
+        operation_id="buildIndex",
+        responses={422: {"model": Response}},
+    )
+    async def _add_images_to_index(
+        request: AddImagesToIndexRequest,
+    ) -> ResultResponse[AddImagesToIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        try:
+            images = [pair.image for pair in request.imageLabelPairs]
+            file_bytes_list = await asyncio.gather(
+                *(serving_utils.get_raw_bytes(img, aiohttp_session) for img in images)
+            )
+            images = [
+                serving_utils.image_bytes_to_array(item) for item in file_bytes_list
+            ]
+            labels = [pair.label for pair in request.imageLabelPairs]
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = await serving_utils.call_async(
+                _deserialize_index_data, index_data_bytes
+            )
+
+            index_data = await pipeline.call(
+                pipeline.pipeline.append_index, images, labels, index_data
+            )
+
+            index_data_bytes = await serving_utils.call_async(
+                _serialize_index_data, index_data
+            )
+            await serving_utils.call_async(
+                index_storage.set, request.indexKey, index_data_bytes
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=AddImagesToIndexResult(idMap=index_data.id_map),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    @app.post(
+        "/face-recognition-index-remove",
+        operation_id="buildIndex",
+        responses={422: {"model": Response}},
+    )
+    async def _remove_images_from_index(
+        request: RemoveImagesFromIndexRequest,
+    ) -> ResultResponse[RemoveImagesFromIndexResult]:
+        pipeline = ctx.pipeline
+
+        try:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = await serving_utils.call_async(
+                _deserialize_index_data, index_data_bytes
+            )
+
+            index_data = await pipeline.call(
+                pipeline.pipeline.remove_index, request.ids, index_data
+            )
+
+            index_data_bytes = await serving_utils.call_async(
+                _serialize_index_data, index_data
+            )
+            await serving_utils.call_async(
+                index_storage.set, request.indexKey, index_data_bytes
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=RemoveImagesFromIndexResult(idMap=index_data.id_map),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    @app.post(
+        "/face-recognition-infer",
+        operation_id="infer",
+        responses={422: {"model": Response}},
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        try:
+            image_bytes = await serving_utils.get_raw_bytes(
+                request.image, aiohttp_session
+            )
+            image = serving_utils.image_bytes_to_array(image_bytes)
+
+            if request.indexKey is not None:
+                index_storage = ctx.extra["index_storage"]
+                index_data_bytes = await serving_utils.call_async(
+                    index_storage.get, request.indexKey
+                )
+                index_data = await serving_utils.call_async(
+                    _deserialize_index_data, index_data_bytes
+                )
+            else:
+                index_data = None
+
+            result = list(
+                await pipeline.call(pipeline.pipeline.predict, image, index=index_data)
+            )[0]
+
+            faces: List[Face] = []
+            for face in result["boxes"]:
+                rec_results: List[RecResult] = []
+                if face["rec_scores"] is not None:
+                    for label, score in zip(face["labels"], face["rec_scores"]):
+                        rec_results.append(
+                            RecResult(
+                                label=label,
+                                score=score,
+                            )
+                        )
+                faces.append(
+                    Face(
+                        bbox=face["coordinate"],
+                        recResults=rec_results,
+                        score=face["det_score"],
+                    )
+                )
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img)
+            )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=InferResult(faces=faces, image=output_image_base64),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    return app

From 886f5e414badde333e2017e48e2c89f9dba66b50 Mon Sep 17 00:00:00 2001
From: Bobholamovic <bob1998425@hotmail.com>
Date: Fri, 15 Nov 2024 16:47:56 +0800
Subject: [PATCH 4/5] Change small object detection endpoint

---
 .../cv_pipelines/small_object_detection.en.md    | 16 ++++++++--------
 .../cv_pipelines/small_object_detection.md       | 16 ++++++++--------
 .../_pipeline_apps/small_object_detection.py     |  4 +++-
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
index ba5db67ce7..0e17a3c965 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
@@ -297,7 +297,7 @@ Below are the API references and multi-language service invocation examples:
 <li><b><code>infer</code></b></li>
 </ul>
 <p>Performs object detection on an image.</p>
-<p><code>POST /object-detection</code></p>
+<p><code>POST /small-object-detection</code></p>
 <ul>
 <li>The request body properties are as follows:</li>
 </ul>
@@ -407,7 +407,7 @@ Below are the API references and multi-language service invocation examples:
 <pre><code class="language-python">import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/object-detection&quot;
+API_URL = &quot;http://localhost:8080/small-object-detection&quot;
 image_path = &quot;./demo.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
@@ -460,7 +460,7 @@ int main() {
     jsonObj[&quot;image&quot;] = encodedImage;
     std::string body = jsonObj.dump();
 
-    auto response = client.Post(&quot;/object-detection&quot;, headers, body, &quot;application/json&quot;);
+    auto response = client.Post(&quot;/small-object-detection&quot;, headers, body, &quot;application/json&quot;);
     if (response &amp;&amp; response-&gt;status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response-&gt;body);
         auto result = jsonResponse[&quot;result&quot;];
@@ -505,7 +505,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = &quot;http://localhost:8080/object-detection&quot;;
+        String API_URL = &quot;http://localhost:8080/small-object-detection&quot;;
         String imagePath = &quot;./demo.jpg&quot;;
         String outputImagePath = &quot;./out.jpg&quot;;
 
@@ -561,7 +561,7 @@ import (
 )
 
 func main() {
-    API_URL := &quot;http://localhost:8080/object-detection&quot;
+    API_URL := &quot;http://localhost:8080/small-object-detection&quot;
     imagePath := &quot;./demo.jpg&quot;
     outputImagePath := &quot;./out.jpg&quot;
 
@@ -641,7 +641,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = &quot;http://localhost:8080/object-detection&quot;;
+    static readonly string API_URL = &quot;http://localhost:8080/small-object-detection&quot;;
     static readonly string imagePath = &quot;./demo.jpg&quot;;
     static readonly string outputImagePath = &quot;./out.jpg&quot;;
 
@@ -677,7 +677,7 @@ class Program
 <pre><code class="language-js">const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/object-detection'
+const API_URL = 'http://localhost:8080/small-object-detection'
 const imagePath = './demo.jpg'
 const outputImagePath = &quot;./out.jpg&quot;;
 
@@ -715,7 +715,7 @@ axios.request(config)
 
 <pre><code class="language-php">&lt;?php
 
-$API_URL = &quot;http://localhost:8080/object-detection&quot;;
+$API_URL = &quot;http://localhost:8080/small-object-detection&quot;;
 $image_path = &quot;./demo.jpg&quot;;
 $output_image_path = &quot;./out.jpg&quot;;
 
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
index 05ee161f76..41daf78d2e 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
@@ -294,7 +294,7 @@ for res in output:
 <li><b><code>infer</code></b></li>
 </ul>
 <p>对图像进行目标检测。</p>
-<p><code>POST /object-detection</code></p>
+<p><code>POST /small-object-detection</code></p>
 <ul>
 <li>请求体的属性如下：</li>
 </ul>
@@ -404,7 +404,7 @@ for res in output:
 <pre><code class="language-python">import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/object-detection&quot; # 服务URL
+API_URL = &quot;http://localhost:8080/small-object-detection&quot; # 服务URL
 image_path = &quot;./demo.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
@@ -462,7 +462,7 @@ int main() {
     std::string body = jsonObj.dump();
 
     // 调用API
-    auto response = client.Post(&quot;/object-detection&quot;, headers, body, &quot;application/json&quot;);
+    auto response = client.Post(&quot;/small-object-detection&quot;, headers, body, &quot;application/json&quot;);
     // 处理接口返回数据
     if (response &amp;&amp; response-&gt;status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response-&gt;body);
@@ -508,7 +508,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = &quot;http://localhost:8080/object-detection&quot;; // 服务URL
+        String API_URL = &quot;http://localhost:8080/small-object-detection&quot;; // 服务URL
         String imagePath = &quot;./demo.jpg&quot;; // 本地图像
         String outputImagePath = &quot;./out.jpg&quot;; // 输出图像
 
@@ -567,7 +567,7 @@ import (
 )
 
 func main() {
-    API_URL := &quot;http://localhost:8080/object-detection&quot;
+    API_URL := &quot;http://localhost:8080/small-object-detection&quot;
     imagePath := &quot;./demo.jpg&quot;
     outputImagePath := &quot;./out.jpg&quot;
 
@@ -650,7 +650,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = &quot;http://localhost:8080/object-detection&quot;;
+    static readonly string API_URL = &quot;http://localhost:8080/small-object-detection&quot;;
     static readonly string imagePath = &quot;./demo.jpg&quot;;
     static readonly string outputImagePath = &quot;./out.jpg&quot;;
 
@@ -689,7 +689,7 @@ class Program
 <pre><code class="language-js">const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/object-detection'
+const API_URL = 'http://localhost:8080/small-object-detection'
 const imagePath = './demo.jpg'
 const outputImagePath = &quot;./out.jpg&quot;;
 
@@ -730,7 +730,7 @@ axios.request(config)
 
 <pre><code class="language-php">&lt;?php
 
-$API_URL = &quot;http://localhost:8080/object-detection&quot;; // 服务URL
+$API_URL = &quot;http://localhost:8080/small-object-detection&quot;; // 服务URL
 $image_path = &quot;./demo.jpg&quot;;
 $output_image_path = &quot;./out.jpg&quot;;
 
diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py b/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
index fe67240312..5287c567ff 100644
--- a/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
+++ b/paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
@@ -49,7 +49,9 @@ def create_pipeline_app(pipeline: SmallObjDet, app_config: AppConfig) -> FastAPI
     )
 
     @app.post(
-        "/object-detection", operation_id="infer", responses={422: {"model": Response}}
+        "/small-object-detection",
+        operation_id="infer",
+        responses={422: {"model": Response}},
     )
     async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
         pipeline = ctx.pipeline

From d351079b3bd104e17e6cc4906b1926f326055d07 Mon Sep 17 00:00:00 2001
From: Bobholamovic <bob1998425@hotmail.com>
Date: Fri, 15 Nov 2024 17:01:38 +0800
Subject: [PATCH 5/5] Fix face recognition bugs

---
 paddlex/inference/pipelines/face_recognition.py | 2 ++
 paddlex/pipelines/face_recognition.yaml         | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/paddlex/inference/pipelines/face_recognition.py b/paddlex/inference/pipelines/face_recognition.py
index a925dbb006..e7a55f7854 100644
--- a/paddlex/inference/pipelines/face_recognition.py
+++ b/paddlex/inference/pipelines/face_recognition.py
@@ -21,6 +21,8 @@ class FaceRecPipeline(ShiTuV2Pipeline):
     entities = "face_recognition"
 
     def get_rec_result(self, det_res, indexer):
+        if len(det_res["boxes"]) == 0:
+            return {"label": [], "score": []}
         subs_of_img = list(self._crop_by_boxes(det_res))
         img_list = [img["img"] for img in subs_of_img]
         all_rec_res = list(self.rec_model(img_list))
diff --git a/paddlex/pipelines/face_recognition.yaml b/paddlex/pipelines/face_recognition.yaml
index ce1cccb27b..5da78fecc0 100644
--- a/paddlex/pipelines/face_recognition.yaml
+++ b/paddlex/pipelines/face_recognition.yaml
@@ -8,6 +8,6 @@ Pipeline:
   det_batch_size: 1
   rec_batch_size: 1
   device: gpu
-  index_dir: None
+  index: None
   score_thres: 0.4
   return_k: 5