PaddlePaddle · luotao1 · Jun 13, 2023 · May 22, 2022 · Aug 6, 2022 · Aug 24, 2022
diff --git a/ci_scripts/CAPItools/README.md b/ci_scripts/CAPItools/README.md
@@ -0,0 +1,34 @@
+# CAPI tools
+CAPI tools 用于一键生成 C++ 的 rst 文档。
+
+## 调用方式
+```python
+python main.py [source dir] [target dir]
+```
+
+其中：
+- source dir 是安装后的 Paddle C++ API 声明路径。 例如`venv/Lib/site-packages/paddle/include/paddle`。
+- target dir 目标文件保存路径。
+
+最终生成结果如下所示：
+```python
+target dir
+| -cn
+    |- index.rst
+    |- Paddle
+        |- fluid
+        |- phi
+        |- ...
+| -en
+    |- index.rst
+    |- Paddle
+        |- fluid
+        |- phi
+        |- ...
+```
+
+## 获取最新 PaddlePaddle
+pip install python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/windows/cpu-mkl-avx/develop.html
+
+## 特别说明
+有少量报错为正常显现，将在后续修正
diff --git a/ci_scripts/CAPItools/main.py b/ci_scripts/CAPItools/main.py
@@ -0,0 +1,140 @@
+# python main.py [source dir] [target dir]
+# python main.py ../paddle .
+
+
+import CppHeaderParser
+import json
+import os
+import traceback
+import sys
+
+from utils_helper import func_helper, class_helper, generate_overview
+from utils import get_PADDLE_API_class, get_PADDLE_API_func
+
+# TODO 通过已安装的 paddle 来查找 include
+# import paddle
+# import inspect
+#
+# # 获取已安装paddle的路径
+# print(os.path.dirname(inspect.getsourcefile(paddle)))
+
+
+# TODO 需要单独处理一下这种
+"""
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+/**
+ * Get the current CUDA stream for the passed CUDA device.
+ */
+PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place);
+#endif
+"""
+
+
+# 获取namespace
+# 多线程使用并不安全, 请不要使用多线程
+def analysis_file(path):
+    header = CppHeaderParser.CppHeader(path, encoding='utf8')
+    data = json.loads(header.toJSON())
+    return data
+
+
+# 生成文件
+def generate_docs(
+    all_funcs, all_class, cpp2py_api_list, save_dir, LANGUAGE="cn"
+):
+    for item in all_funcs:
+        path = item["filename"].replace("../", "").replace(".h", "")
+        dir_path = os.path.join(save_dir, LANGUAGE, path)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+
+        # 这个反斜杠需要单独处理, 在 linux 下
+        func_name = item["name"].replace("/", "")
+        rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst")
+        # avoid a filename such as operate*.rst, only windows
+        try:
+            helper = func_helper(item, cpp2py_api_list)
+            helper.create_file(rst_dir, LANGUAGE)
+        except:
+            print(traceback.format_exc())
+            print('FAULT GENERATE:' + rst_dir)
+
+    for item in all_class:
+        path = item["filename"].replace("../", "").replace(".h", "")
+        dir_path = os.path.join(save_dir, LANGUAGE, path)
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+
+        func_name = item["name"].replace("PADDLE_API", "")
+        rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst")
+        try:
+            helper = class_helper(item)
+            helper.create_file(rst_dir, LANGUAGE)
+        except:
+            print(traceback.format_exc())
+            print('FAULT GENERATE:' + rst_dir)
+
+
+# cpp 对应 python api
+def cpp2py(data: dict):
+    cpp2py_api_list = []
+    for i in data["using"]:
+        cpp2py_api_list.append(i.replace("paddle::", ""))
+
+    return cpp2py_api_list
+
+
+if __name__ == "__main__":
+    assert len(sys.argv) == 3
+
+    root_dir = sys.argv[1]
+    save_dir = sys.argv[2]
+
+    all_funcs = []
+    all_class = []
+    cpp2py_api_list = []
+    overview_list = []
+    for home, dirs, files in os.walk(root_dir):
+        for file_name in files:
+            file_path = os.path.join(home, file_name)
+            # 处理 cpp 和 py api对应的文件
+            if file_name == "tensor_compat.h":
+                cpp2py_data = analysis_file(file_path)
+                cpp2py_api_list = cpp2py(cpp2py_data).copy()
+
+            # 跳过文件中未包含PADDLE_API
+            with open(file_path, encoding='utf8') as f:
+                if 'PADDLE_API ' not in f.read():
+                    continue
+
+            print("Parsing: ", file_path)
+            data = analysis_file(file_path)
+
+            # 信息抽取
+            current_func = get_PADDLE_API_func(data)
+            current_class = get_PADDLE_API_class(data)
+
+            # 信息记录
+            all_funcs.extend(current_func)
+            all_class.extend(current_class)
+            overview_list.append(
+                {
+                    'h_file': file_path,
+                    'class': current_class,
+                    'function': current_func,
+                }
+            )
+
+    generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "cn")
+    generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "en")
+
+    # TODO: delete the try-except after every thing is prepare
+    try:
+        generate_overview(overview_list, save_dir, "cn")
+        generate_overview(overview_list, save_dir, "en")
+    except:
+        print('index error')
+
+    print("PADDLE_API func count: ", len(all_funcs))
+    print("PADDLE_API class count: ", len(all_class))
+    print("cpp2py api count: ", len(cpp2py_api_list))
diff --git a/ci_scripts/CAPItools/requirements.txt b/ci_scripts/CAPItools/requirements.txt
@@ -0,0 +1,2 @@
+robotpy-cppheaderparser==5.1.0
+# paddle
diff --git a/ci_scripts/CAPItools/utils.py b/ci_scripts/CAPItools/utils.py
@@ -0,0 +1,84 @@
+# 获取存在 PADDLE_API func 数组的名称
+def get_PADDLE_API_func(data: dict):
+    result = []
+    for i in data["functions"]:
+        if 'PADDLE_API' in i['debug']:
+            result.append(i)
+    return result
+
+
+# 获取存在 PADDLE_API class 数组的名称
+def get_PADDLE_API_class(data: dict):
+    result = []
+    for classname in data["classes"]:
+        # TODO 目前没有 PADDLE_API 是 struct 的
+        if data["classes"][classname]["declaration_method"] == "struct":
+            continue
+
+        # TODO 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool
+        if "PADDLE_API" in classname:
+            result.append(data["classes"][classname])
+    return result
+
+
+# 获取方法中的参数parameters
+def get_parameters(parameters):
+    # parameter_api = ""  # 这里解析是给api使用的 (暂时不用)
+    parameter_dict = {}
+    for i in parameters:
+        parameter_type_tmp = i['type'].replace(" &", "").replace(" *", "")
+        # * 和 & 情况
+        # parameter_api += parameter_type_tmp
+        if i["reference"] == 1:
+            # parameter_api += "&"
+            parameter_type_tmp += "&"
+        if i["pointer"] == 1:
+            # parameter_api += "*"
+            parameter_type_tmp += "*"
+        if i["constant"] == 1 and not parameter_type_tmp.startswith('const'):
+            parameter_type_tmp = "const " + parameter_type_tmp
+        # parameter_api += f" {i['name']}, "
+        desc = i.get('desc', '').replace('  ', '')
+
+        # special progress for none parameter name case
+        if i['name'] == '&':
+            continue
+        else:
+            parameter_dict[i['name']] = {
+                'type': parameter_type_tmp,
+                'intro': desc,
+            }
+        # parameter += f"\t- **{i['name']}** ({parameter_type_tmp}) - {desc}\n"
+    # 去掉末尾的逗号
+    # parameter_api = parameter_api[:-2]
+    # return parameter, parameter_api
+    return parameter_dict
+
+
+def parse_doxygen(doxygen):
+    doxygen_dict = {
+        'intro': '',
+        'returns': '',
+        'param_intro': {},
+        'note': '',
+    }
+
+    if '@' in doxygen:
+        doxygen = doxygen[doxygen.find('@') :]
+        for doxygen_part in doxygen.split('@'):
+            if doxygen_part.startswith('brief '):
+                doxygen_dict['intro'] = doxygen_part.replace('brief ', '', 1)
+            elif doxygen_part.startswith('return '):
+                doxygen_dict['returns'] = doxygen_part.replace('return ', '', 1)
+            elif doxygen_part.startswith('param '):
+                param_intro = doxygen_part.replace('param ', '', 1)
+                param_name = param_intro[: param_intro.find(' ')]
+                doxygen_dict['param_intro'][param_name] = param_intro[
+                    param_intro.find(' ') + 1 :
+                ]
+            elif doxygen_part.startswith('note '):
+                doxygen_dict['note'] = doxygen_part.replace('note ', '', 1)
+            else:
+                pass
+
+    return doxygen_dict