Skip to content

Commit 9d695ac

Browse files
authored
Add Cpp Doc Generate tools (#5900)
* CAPItools * add note
1 parent 9dca23d commit 9d695ac

File tree

5 files changed

+877
-0
lines changed

5 files changed

+877
-0
lines changed

ci_scripts/CAPItools/README.md

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# CAPI tools
2+
CAPI tools 用于一键生成 C++ 的 rst 文档。
3+
4+
## 调用方式
5+
```python
6+
python main.py <source dir> <target dir>
7+
```
8+
9+
若不设置`source dir``target dir`,则默认先查找已安装的`paddlepaddle`包环境。
10+
11+
其中:
12+
- source dir 是安装后的 Paddle C++ API 声明路径。 例如`venv/Lib/site-packages/paddle/include/paddle`
13+
- target dir 目标文件保存路径。
14+
15+
最终生成结果如下所示:
16+
```python
17+
target dir
18+
| -cn
19+
|- index.rst
20+
|- Paddle
21+
|- fluid
22+
|- phi
23+
|- ...
24+
| -en
25+
|- index.rst
26+
|- Paddle
27+
|- fluid
28+
|- phi
29+
|- ...
30+
```
31+
32+
## 获取最新 PaddlePaddle
33+
pip install python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/windows/cpu-mkl-avx/develop.html
34+
35+
## 代码结构
36+
37+
### `main.py`文件主要用于处理和筛选包文件, 并调用`utils_helper.py`中的函数进行文件生成
38+
```python
39+
def analysis_file() # 用于解析文件内容(多线程不安全)
40+
41+
def generate_docs() # 用于创建目录并传值给 utils_helper.py 中的函数进行文件生成
42+
43+
def cpp2py() # 用于筛选出 cpp api 和 py api 相对应的函数名称
44+
```
45+
46+
### `utils_helper.py`文件主要存放函数生成、解析, 以及文件写入的工作
47+
```python
48+
49+
class func_helper(object) # 用于生成和解析方法
50+
decode() # 用于解析输出输出参数、函数名称、返回值、函数注释信息
51+
class class_helper(object) # 用于生成和解析类
52+
decode() # 同 func_helper()
53+
54+
def generate_overview() # 用于生成 overview.rst 文件
55+
```

ci_scripts/CAPItools/main.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# python main.py [source dir] [target dir]
2+
# python main.py ../paddle .
3+
4+
5+
import CppHeaderParser
6+
import json
7+
import os
8+
import traceback
9+
import sys
10+
import re
11+
12+
from utils_helper import func_helper, class_helper, generate_overview
13+
from utils import get_PADDLE_API_class, get_PADDLE_API_func
14+
15+
16+
# 解析所有的函数, 类, 枚举, 返回一个字典
17+
# 多线程使用并不安全, 请不要使用多线程
18+
def analysis_file(path):
19+
header = CppHeaderParser.CppHeader(path, encoding='utf8')
20+
data = json.loads(header.toJSON())
21+
return data
22+
23+
24+
# 生成文件
25+
# 根据给定的list内容,生成对应的文档信息
26+
def generate_docs(
27+
all_funcs, all_class, cpp2py_api_list, save_dir, LANGUAGE="cn"
28+
):
29+
for item in all_funcs:
30+
path = item["filename"].replace("../", "").replace(".h", "")
31+
dir_path = os.path.join(save_dir, LANGUAGE, path)
32+
if not os.path.exists(dir_path):
33+
os.makedirs(dir_path)
34+
35+
# 这个反斜杠需要单独处理, 在 linux 下
36+
func_name = item["name"].replace("/", "")
37+
38+
# Note: 操作符仅不生成rst,实际上在Overview列表依然会呈现以提示存在此操作符
39+
if func_name.startswith('operator'):
40+
checkwords = func_name.replace('operator', '', 1)
41+
if re.search(r"\w", checkwords) == None:
42+
continue # 跳过操作符声明
43+
rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst")
44+
# avoid a filename such as operate*.rst, only windows
45+
try:
46+
helper = func_helper(item, cpp2py_api_list)
47+
helper.create_and_write_file(rst_dir, LANGUAGE)
48+
except:
49+
print(traceback.format_exc())
50+
print('FAULT GENERATE:' + rst_dir)
51+
52+
for item in all_class:
53+
path = item["filename"].replace("../", "").replace(".h", "")
54+
dir_path = os.path.join(save_dir, LANGUAGE, path)
55+
if not os.path.exists(dir_path):
56+
os.makedirs(dir_path)
57+
58+
func_name = item["name"].replace("PADDLE_API", "")
59+
rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst")
60+
try:
61+
helper = class_helper(item)
62+
helper.create_and_write_file(rst_dir, LANGUAGE)
63+
except:
64+
print(traceback.format_exc())
65+
print('FAULT GENERATE:' + rst_dir)
66+
67+
68+
# cpp 对应 python api
69+
# 用于存储 api 的名称, 用于后续生成对应python api文档链接
70+
def cpp2py(data: dict):
71+
cpp2py_api_list = []
72+
for i in data["using"]:
73+
cpp2py_api_list.append(i.replace("paddle::", ""))
74+
75+
return cpp2py_api_list
76+
77+
78+
# 运行主函数,主要流程如下
79+
# 1. 确定生成的目录
80+
# 2. 提取待生成文档的PADDLE_API list
81+
# 3. 生成文档
82+
if __name__ == "__main__":
83+
root_dir = ''
84+
save_dir = '.' # 默认保存在当前目录
85+
if len(sys.argv) == 3:
86+
root_dir = sys.argv[1]
87+
save_dir = sys.argv[2]
88+
89+
if root_dir == '':
90+
try:
91+
import paddle
92+
import inspect
93+
94+
root_dir = os.path.dirname(inspect.getsourcefile(paddle))
95+
except:
96+
# for simple run
97+
root_dir = '../paddle'
98+
save_dir = '.' # 默认保存在当前目录
99+
100+
all_funcs = []
101+
all_class = []
102+
cpp2py_api_list = []
103+
overview_list = []
104+
for home, dirs, files in os.walk(root_dir):
105+
for file_name in files:
106+
# 跳过不需要处理的文件
107+
if file_name.split(".")[-1] not in ["cc", "cu", "h"]:
108+
continue
109+
110+
file_path = os.path.join(home, file_name)
111+
# 处理 cpp 和 py api对应的文件, 目前只有这个文件内的 cpp api和 python api是对应的
112+
if file_name == "tensor_compat.h":
113+
cpp2py_data = analysis_file(file_path)
114+
cpp2py_api_list = cpp2py(cpp2py_data).copy()
115+
116+
# 跳过文件中未包含PADDLE_API
117+
with open(file_path, encoding='utf-8') as f:
118+
if 'PADDLE_API ' not in f.read():
119+
continue
120+
121+
print("Parsing: ", file_path)
122+
data = analysis_file(file_path)
123+
124+
# 信息抽取
125+
current_func = get_PADDLE_API_func(data)
126+
current_class = get_PADDLE_API_class(data)
127+
128+
# 信息记录
129+
all_funcs.extend(current_func)
130+
all_class.extend(current_class)
131+
overview_list.append(
132+
{
133+
'h_file': file_path,
134+
'class': current_class,
135+
'function': current_func,
136+
}
137+
)
138+
139+
# 生成文档
140+
generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "cn")
141+
generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "en")
142+
143+
# 生成 overview
144+
generate_overview(overview_list, save_dir, "cn")
145+
generate_overview(overview_list, save_dir, "en")
146+
147+
# 统计信息
148+
print("PADDLE_API func count: ", len(all_funcs))
149+
print("PADDLE_API class count: ", len(all_class))
150+
print("cpp2py api count: ", len(cpp2py_api_list))

ci_scripts/CAPItools/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
robotpy-cppheaderparser==5.1.0
2+
# paddle

ci_scripts/CAPItools/utils.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# 获取存在 PADDLE_API func 数组的名称
2+
# CppHeaderParser 解析后以字典形式保存数据,'debug' 字段中保存了原始信息
3+
# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的函数
4+
def get_PADDLE_API_func(data: dict):
5+
result = []
6+
for i in data["functions"]:
7+
if 'PADDLE_API' in i['debug']:
8+
result.append(i)
9+
return result
10+
11+
12+
# 获取存在 PADDLE_API class 数组的名称
13+
# CppHeaderParser 解析后以字典形式保存数据
14+
# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的类
15+
def get_PADDLE_API_class(data: dict):
16+
result = []
17+
for classname in data["classes"]:
18+
# Note 目前没有 PADDLE_API 是 struct 的
19+
if data["classes"][classname]["declaration_method"] == "struct":
20+
continue
21+
22+
# Note 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool
23+
if "PADDLE_API" in classname:
24+
result.append(data["classes"][classname])
25+
return result
26+
27+
28+
# 获取方法中的参数parameters
29+
# 根据解析的参数字典,添加对应的参数名、参数类型、说明
30+
# 有时候会将“&”解析为参数名,需要特殊处理
31+
def get_parameters(parameters):
32+
# parameter_api = "" # 这里解析是给api使用的 (暂时不用)
33+
parameter_dict = {}
34+
for i in parameters:
35+
parameter_type_tmp = i['type'].replace(" &", "").replace(" *", "")
36+
# * 和 & 情况
37+
# parameter_api += parameter_type_tmp
38+
39+
# 添加引用
40+
parameter_type_tmp += "&" * i["reference"]
41+
if i["pointer"] == 1:
42+
# parameter_api += "*"
43+
parameter_type_tmp += "*"
44+
if i["constant"] == 1 and not parameter_type_tmp.startswith('const'):
45+
parameter_type_tmp = "const " + parameter_type_tmp
46+
# parameter_api += f" {i['name']}, "
47+
desc = i.get('desc', '').replace(' ', '')
48+
49+
# special progress for none parameter name case
50+
if i['name'] == '&':
51+
continue
52+
else:
53+
parameter_dict[i['name']] = {
54+
'type': parameter_type_tmp,
55+
'intro': desc,
56+
}
57+
# parameter += f"\t- **{i['name']}** ({parameter_type_tmp}) - {desc}\n"
58+
# 去掉末尾的逗号
59+
# parameter_api = parameter_api[:-2]
60+
# return parameter, parameter_api
61+
return parameter_dict
62+
63+
64+
# 将注释内容解析为说明字典
65+
# 解析前: @brief Construct a Tensor from a buffer pointed to by `data` @note `from_blob` doesn’t copy or move data, Modifying the constructed tensor is equivalent to modifying the original data. @param data The pointer to the memory buffer. @param shape The dims of the tensor. @param dtype The data type of the tensor, should correspond to data type of`data`. See PD_FOR_EACH_DATA_TYPE in `phi/common/data_type.h` @param layout The data layout of the tensor. @param place The place where the tensor is located.If `place` is default value, it will be inferred from `data`,However, the feature is only supported on CPU or GPU.If `place` is not default value, make sure that `place` is equalto the place of `data` @param deleter A function or function object that will be called to free thememory buffer. @return A Tensor object constructed from the buffer
66+
# 以@作为分隔符,索引关键字包括'brief'、'note'、'return'、'param'
67+
# 解析后分别将对应关键字后的内容放入字典对应关键字后
68+
def parse_doxygen(doxygen):
69+
doxygen_dict = {
70+
'intro': '',
71+
'returns': '',
72+
'param_intro': {},
73+
'note': '',
74+
}
75+
76+
if '@' in doxygen:
77+
doxygen = doxygen[doxygen.find('@') :]
78+
for doxygen_part in doxygen.split('@'):
79+
if doxygen_part.startswith('brief '):
80+
doxygen_dict['intro'] = doxygen_part.replace('brief ', '', 1)
81+
elif doxygen_part.startswith('return '):
82+
doxygen_dict['returns'] = doxygen_part.replace('return ', '', 1)
83+
elif doxygen_part.startswith('param '):
84+
param_intro = doxygen_part.replace('param ', '', 1)
85+
param_name = param_intro[: param_intro.find(' ')]
86+
doxygen_dict['param_intro'][param_name] = param_intro[
87+
param_intro.find(' ') + 1 :
88+
]
89+
elif doxygen_part.startswith('note '):
90+
doxygen_dict['note'] = doxygen_part.replace('note ', '', 1)
91+
else:
92+
pass
93+
94+
return doxygen_dict

0 commit comments

Comments
 (0)