From efed103982d264d7fc9c334f96d12aee009da92d Mon Sep 17 00:00:00 2001 From: Liyulingyue <852433440@qq.com> Date: Tue, 30 May 2023 06:15:00 +0800 Subject: [PATCH 1/2] CAPItools --- ci_scripts/CAPItools/README.md | 34 ++ ci_scripts/CAPItools/main.py | 140 +++++++ ci_scripts/CAPItools/requirements.txt | 2 + ci_scripts/CAPItools/utils.py | 84 ++++ ci_scripts/CAPItools/utils_helper.py | 571 ++++++++++++++++++++++++++ 5 files changed, 831 insertions(+) create mode 100644 ci_scripts/CAPItools/README.md create mode 100644 ci_scripts/CAPItools/main.py create mode 100644 ci_scripts/CAPItools/requirements.txt create mode 100644 ci_scripts/CAPItools/utils.py create mode 100644 ci_scripts/CAPItools/utils_helper.py diff --git a/ci_scripts/CAPItools/README.md b/ci_scripts/CAPItools/README.md new file mode 100644 index 00000000000..fc5e81608d7 --- /dev/null +++ b/ci_scripts/CAPItools/README.md @@ -0,0 +1,34 @@ +# CAPI tools +CAPI tools 用于一键生成 C++ 的 rst 文档。 + +## 调用方式 +```python +python main.py [source dir] [target dir] +``` + +其中: +- source dir 是安装后的 Paddle C++ API 声明路径。 例如`venv/Lib/site-packages/paddle/include/paddle`。 +- target dir 目标文件保存路径。 + +最终生成结果如下所示: +```python +target dir +| -cn + |- index.rst + |- Paddle + |- fluid + |- phi + |- ... +| -en + |- index.rst + |- Paddle + |- fluid + |- phi + |- ... +``` + +## 获取最新 PaddlePaddle +pip install python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/windows/cpu-mkl-avx/develop.html + +## 特别说明 +有少量报错为正常显现,将在后续修正 diff --git a/ci_scripts/CAPItools/main.py b/ci_scripts/CAPItools/main.py new file mode 100644 index 00000000000..484ef28ef4f --- /dev/null +++ b/ci_scripts/CAPItools/main.py @@ -0,0 +1,140 @@ +# python main.py [source dir] [target dir] +# python main.py ../paddle . + + +import CppHeaderParser +import json +import os +import traceback +import sys + +from utils_helper import func_helper, class_helper, generate_overview +from utils import get_PADDLE_API_class, get_PADDLE_API_func + +# TODO 通过已安装的 paddle 来查找 include +# import paddle +# import inspect +# +# # 获取已安装paddle的路径 +# print(os.path.dirname(inspect.getsourcefile(paddle))) + + +# TODO 需要单独处理一下这种 +""" +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +/** + * Get the current CUDA stream for the passed CUDA device. + */ +PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place); +#endif +""" + + +# 获取namespace +# 多线程使用并不安全, 请不要使用多线程 +def analysis_file(path): + header = CppHeaderParser.CppHeader(path, encoding='utf8') + data = json.loads(header.toJSON()) + return data + + +# 生成文件 +def generate_docs( + all_funcs, all_class, cpp2py_api_list, save_dir, LANGUAGE="cn" +): + for item in all_funcs: + path = item["filename"].replace("../", "").replace(".h", "") + dir_path = os.path.join(save_dir, LANGUAGE, path) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + # 这个反斜杠需要单独处理, 在 linux 下 + func_name = item["name"].replace("/", "") + rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst") + # avoid a filename such as operate*.rst, only windows + try: + helper = func_helper(item, cpp2py_api_list) + helper.create_file(rst_dir, LANGUAGE) + except: + print(traceback.format_exc()) + print('FAULT GENERATE:' + rst_dir) + + for item in all_class: + path = item["filename"].replace("../", "").replace(".h", "") + dir_path = os.path.join(save_dir, LANGUAGE, path) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + func_name = item["name"].replace("PADDLE_API", "") + rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst") + try: + helper = class_helper(item) + helper.create_file(rst_dir, LANGUAGE) + except: + print(traceback.format_exc()) + print('FAULT GENERATE:' + rst_dir) + + +# cpp 对应 python api +def cpp2py(data: dict): + cpp2py_api_list = [] + for i in data["using"]: + cpp2py_api_list.append(i.replace("paddle::", "")) + + return cpp2py_api_list + + +if __name__ == "__main__": + assert len(sys.argv) == 3 + + root_dir = sys.argv[1] + save_dir = sys.argv[2] + + all_funcs = [] + all_class = [] + cpp2py_api_list = [] + overview_list = [] + for home, dirs, files in os.walk(root_dir): + for file_name in files: + file_path = os.path.join(home, file_name) + # 处理 cpp 和 py api对应的文件 + if file_name == "tensor_compat.h": + cpp2py_data = analysis_file(file_path) + cpp2py_api_list = cpp2py(cpp2py_data).copy() + + # 跳过文件中未包含PADDLE_API + with open(file_path, encoding='utf8') as f: + if 'PADDLE_API ' not in f.read(): + continue + + print("Parsing: ", file_path) + data = analysis_file(file_path) + + # 信息抽取 + current_func = get_PADDLE_API_func(data) + current_class = get_PADDLE_API_class(data) + + # 信息记录 + all_funcs.extend(current_func) + all_class.extend(current_class) + overview_list.append( + { + 'h_file': file_path, + 'class': current_class, + 'function': current_func, + } + ) + + generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "cn") + generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "en") + + # TODO: delete the try-except after every thing is prepare + try: + generate_overview(overview_list, save_dir, "cn") + generate_overview(overview_list, save_dir, "en") + except: + print('index error') + + print("PADDLE_API func count: ", len(all_funcs)) + print("PADDLE_API class count: ", len(all_class)) + print("cpp2py api count: ", len(cpp2py_api_list)) diff --git a/ci_scripts/CAPItools/requirements.txt b/ci_scripts/CAPItools/requirements.txt new file mode 100644 index 00000000000..13a2a3bf3d7 --- /dev/null +++ b/ci_scripts/CAPItools/requirements.txt @@ -0,0 +1,2 @@ +robotpy-cppheaderparser==5.1.0 +# paddle \ No newline at end of file diff --git a/ci_scripts/CAPItools/utils.py b/ci_scripts/CAPItools/utils.py new file mode 100644 index 00000000000..ddbf2756ad7 --- /dev/null +++ b/ci_scripts/CAPItools/utils.py @@ -0,0 +1,84 @@ +# 获取存在 PADDLE_API func 数组的名称 +def get_PADDLE_API_func(data: dict): + result = [] + for i in data["functions"]: + if 'PADDLE_API' in i['debug']: + result.append(i) + return result + + +# 获取存在 PADDLE_API class 数组的名称 +def get_PADDLE_API_class(data: dict): + result = [] + for classname in data["classes"]: + # TODO 目前没有 PADDLE_API 是 struct 的 + if data["classes"][classname]["declaration_method"] == "struct": + continue + + # TODO 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool + if "PADDLE_API" in classname: + result.append(data["classes"][classname]) + return result + + +# 获取方法中的参数parameters +def get_parameters(parameters): + # parameter_api = "" # 这里解析是给api使用的 (暂时不用) + parameter_dict = {} + for i in parameters: + parameter_type_tmp = i['type'].replace(" &", "").replace(" *", "") + # * 和 & 情况 + # parameter_api += parameter_type_tmp + if i["reference"] == 1: + # parameter_api += "&" + parameter_type_tmp += "&" + if i["pointer"] == 1: + # parameter_api += "*" + parameter_type_tmp += "*" + if i["constant"] == 1 and not parameter_type_tmp.startswith('const'): + parameter_type_tmp = "const " + parameter_type_tmp + # parameter_api += f" {i['name']}, " + desc = i.get('desc', '').replace(' ', '') + + # special progress for none parameter name case + if i['name'] == '&': + continue + else: + parameter_dict[i['name']] = { + 'type': parameter_type_tmp, + 'intro': desc, + } + # parameter += f"\t- **{i['name']}** ({parameter_type_tmp}) - {desc}\n" + # 去掉末尾的逗号 + # parameter_api = parameter_api[:-2] + # return parameter, parameter_api + return parameter_dict + + +def parse_doxygen(doxygen): + doxygen_dict = { + 'intro': '', + 'returns': '', + 'param_intro': {}, + 'note': '', + } + + if '@' in doxygen: + doxygen = doxygen[doxygen.find('@') :] + for doxygen_part in doxygen.split('@'): + if doxygen_part.startswith('brief '): + doxygen_dict['intro'] = doxygen_part.replace('brief ', '', 1) + elif doxygen_part.startswith('return '): + doxygen_dict['returns'] = doxygen_part.replace('return ', '', 1) + elif doxygen_part.startswith('param '): + param_intro = doxygen_part.replace('param ', '', 1) + param_name = param_intro[: param_intro.find(' ')] + doxygen_dict['param_intro'][param_name] = param_intro[ + param_intro.find(' ') + 1 : + ] + elif doxygen_part.startswith('note '): + doxygen_dict['note'] = doxygen_part.replace('note ', '', 1) + else: + pass + + return doxygen_dict diff --git a/ci_scripts/CAPItools/utils_helper.py b/ci_scripts/CAPItools/utils_helper.py new file mode 100644 index 00000000000..4107a190d2a --- /dev/null +++ b/ci_scripts/CAPItools/utils_helper.py @@ -0,0 +1,571 @@ +import os + +from utils import get_parameters, parse_doxygen + + +class func_helper(object): + def __init__(self, function_dict, cpp2py_api_list): + super(func_helper, self).__init__() + self.function_dict = function_dict + self.cpp2py_api_list = cpp2py_api_list + self.decode() + + def decode(self): + # TODO 这里要看一下 operator== 这种情况能不能正常解析 + self.func_name = self.function_dict["name"] + # 解析api + self.api = self.function_dict["debug"].replace("PADDLE_API ", "") + self.namespace = self.function_dict["namespace"].replace("::", "_") + doxygen = ( + self.function_dict.get("doxygen", "") + .replace("/**", "") + .replace("*/", "") + .replace("\n*", "") + .replace(" ", "") + ) + self.introduction = doxygen + + self.note = "" + + # TODO 如果使用已安装的 paddle 包需要调整 + self.file_path = self.function_dict["filename"].replace("../", "") + + if len(self.function_dict["parameters"]) != 0: + self.parameter_dict = get_parameters( + self.function_dict["parameters"] + ) + else: + self.parameter_dict = {} + + self.returns = self.function_dict["returns"].replace("PADDLE_API ", "") + + # analysis doxygen + doxygen_dict = parse_doxygen(doxygen) + if doxygen_dict['intro'] != "": + self.introduction = doxygen_dict['intro'] + if doxygen_dict['note'] != "": + self.note = doxygen_dict['note'] + if doxygen_dict['returns'] != "": + self.returns = doxygen_dict['returns'] + if doxygen_dict['param_intro'] != {}: + for param_name in doxygen_dict['param_intro'].keys(): + self.parameter_dict[param_name]['intro'] = doxygen_dict[ + 'param_intro' + ][param_name] + + def create_file(self, save_dir, language): + if language == 'cn': + self.create_file_cn(save_dir, language) + elif language == 'en': + self.create_file_en(save_dir, language) + else: + print('Error language! ') + + def create_file_cn(self, save_dir, language): + with open(save_dir, 'w', encoding='utf8') as f: + head_text = ( + f'.. _{language}_api_{self.namespace}{self.func_name}:\n' f'\n' + ) + f.write(head_text) + + name_and_intro_text = ( + f'{self.func_name}\n' + f'-------------------------------\n' + f'\n' + f'..cpp: function::{self.api}\n' + f'{self.introduction}\n' + f'\n' + ) + f.write(name_and_intro_text) + + if self.func_name in self.cpp2py_api_list: + cpp2py_text = ( + f'本 API 与 Python API 对齐,详细用法可参考链接:' + f'[paddle.{self.func_name}]' + f'(https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/{self.func_name}_{language}.html)\n' + f'\n' + ) + f.write(cpp2py_text) + + if self.note != "": + note_text = f'..note::\n' f'\t{self.note}\n' f'\n' + f.write(note_text) + + define_path_text = ( + f'定义目录\n' f':::::::::::::::::::::\n' f'{self.file_path}\n' f'\n' + ) + f.write(define_path_text) + + if len(self.parameter_dict) != 0: + parameters_text = f'参数\n' f':::::::::::::::::::::' + f.write(parameters_text + '\n') + for param in self.parameter_dict.keys(): + param_text = f"\t- **{param}**" + if self.parameter_dict[param]['type'] != "": + param_text += f" ({self.parameter_dict[param]['type']})" + if self.parameter_dict[param]['intro'] != "": + param_text += ( + f" - {self.parameter_dict[param]['intro']}" + ) + param_text += "\n" + f.write(param_text) + f.write('\n') + + return_text = ( + f'返回\n' f':::::::::::::::::::::\n' f'{self.returns}' f'\n' + ) + if 'void' not in self.returns: + f.write(return_text) + + def create_file_en(self, save_dir, language): + with open(save_dir, 'w', encoding='utf8') as f: + head_text = ( + f'.. _{language}_api_{self.namespace}{self.func_name}:\n' f'\n' + ) + f.write(head_text) + + name_and_intro_text = ( + f'{self.func_name}\n' + f'-------------------------------\n' + f'\n' + f'..cpp: function::{self.api}\n' + f'{self.introduction}\n' + f'\n' + ) + f.write(name_and_intro_text) + + if self.func_name in self.cpp2py_api_list: + cpp2py_text = ( + f'This API is aligned with Python API, more details are shown in ' + f'[paddle.{self.func_name}]' + f'(https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/{self.func_name}_{language}.html)\n' + f'\n' + ) + f.write(cpp2py_text) + + if self.note != "": + note_text = f'..note::\n' f'\t{self.note}\n' f'\n' + f.write(note_text) + + define_path_text = ( + f'Path\n' f':::::::::::::::::::::\n' f'{self.file_path}\n' f'\n' + ) + f.write(define_path_text) + + if len(self.parameter_dict) != 0: + parameters_text = f'Parameters\n' f':::::::::::::::::::::' + f.write(parameters_text + '\n') + for param in self.parameter_dict.keys(): + param_text = f"\t- **{param}**" + if self.parameter_dict[param]['type'] != "": + param_text += f" ({self.parameter_dict[param]['type']})" + if self.parameter_dict[param]['intro'] != "": + param_text += ( + f" - {self.parameter_dict[param]['intro']}" + ) + param_text += "\n" + f.write(param_text) + f.write('\n') + + return_text = ( + f'Returns\n' f':::::::::::::::::::::\n' f'{self.returns}' f'\n' + ) + if 'void' not in self.returns: + f.write(return_text) + + +class class_helper(object): + def __init__(self, class_dict): + super(class_helper, self).__init__() + self.class_dict = class_dict + self.decode() + + def decode(self): + self.branch = "develop" # TODO 这里可以看看从包里面获取 + self.class_name = self.class_dict["name"].replace("PADDLE_API", "") + # TODO 如果使用已安装的 paddle 包需要调整 + self.file_path = self.class_dict["filename"].replace("../", "") + doxygen = ( + self.class_dict.get("doxygen", "") + .replace("/**", "") + .replace("*/", "") + .replace("\n*", "") + .replace(" ", "") + ) + self.introduction = doxygen + self.note = "" + # analysis doxygen + doxygen_dict = parse_doxygen(doxygen) + if doxygen_dict['intro'] != "": + self.introduction = doxygen_dict['intro'] + if doxygen_dict['note'] != "": + self.note = doxygen_dict['note'] + + # 初始化函数 + # 避免空函数解析 + self.init_func = self.class_name + + self.functions_infor = [] + # TODO: 未来可能在private也有函数 + self.class_function_number = len(self.class_dict["methods"]["public"]) + for i in range(self.class_function_number): + ith_function = self.class_dict["methods"]["public"][i] + if self.class_name in ith_function["name"] and len( + ith_function["debug"] + ) > len(self.init_func): + self.init_func = ith_function["debug"] + + function_name = ith_function['debug'] + # 获取描述 + funcs_doxygen = ( + ith_function.get("doxygen", "") + .replace("/**", "") + .replace("*/", "") + .replace("\n*", "") + .replace(" ", "") + ) + funcs_intro = funcs_doxygen + funcs_note = "" + + # 解析参数 + if len(ith_function["parameters"]) != 0: + parameter_dict = get_parameters(ith_function["parameters"]) + else: + parameter_dict = {} + # 获取返回值 + # returns = ith_function["returns"].replace("PADDLE_API ", "") + returns = ith_function["rtnType"] + # TODO Template 没有仅对class起作用,可能需要同步添加到API中 + template = "" + if ith_function['template'] != False: + template = ith_function['template'] + + # analysis doxygen + doxygen_dict = parse_doxygen(funcs_doxygen) + if doxygen_dict['intro'] != "": + funcs_intro = doxygen_dict['intro'] + if doxygen_dict['note'] != "": + funcs_note = doxygen_dict['note'] + if doxygen_dict['returns'] != "": + returns = doxygen_dict['returns'] + if doxygen_dict['param_intro'] != {}: + for param_name in doxygen_dict['param_intro'].keys(): + # TODO: 可能param_name 不同步,需要注意 + if param_name in parameter_dict.keys(): + parameter_dict[param_name]['intro'] = doxygen_dict[ + 'param_intro' + ][param_name] + + self.functions_infor.append( + { + 'name': function_name, + 'doxygen': funcs_intro, + 'note': funcs_note, + 'parameter': parameter_dict, + 'returns': returns, + 'template': template, + } + ) + + # if '@' in self.doxygen: + # print('CLASS: ' + self.file_path + ' - ' + self.class_name) + + def create_file(self, save_dir, language): + if language == 'cn': + self.create_file_cn(save_dir, language) + elif language == 'en': + self.create_file_en(save_dir, language) + else: + print('Error language! ') + + def create_file_cn(self, save_dir, language): + with open(save_dir, 'w', encoding='utf8') as f: + head_text = f'.. _{language}_api_{self.class_name}:\n' f'\n' + f.write(head_text) + + name_and_intro_text = ( + f'{self.class_name}[源代码](https://github.com/PaddlePaddle/Paddle/blob/{self.branch}/{self.file_path})\n' + f'-------------------------------\n' + f'\n' + f'.. cpp:class:: {self.init_func}\n' + f'{self.introduction}\n' + f'\n' + ) + f.write(name_and_intro_text) + + if self.note != "": + note_text = f'..note::\n' f'\t{self.note}\n' f'\n' + f.write(note_text) + + define_path_text = ( + f'定义目录\n' f':::::::::::::::::::::\n' f'{self.file_path}\n' f'\n' + ) + f.write(define_path_text) + + if self.class_function_number != 0: + class_function_head_text = ( + f'方法\n' f':::::::::::::::::::::\n' f'\n' + ) + f.write(class_function_head_text) + + for fun_infor in self.functions_infor: + if fun_infor['template'] == "": + fun_name_and_intro_text = "" + else: + fun_name_and_intro_text = f'{fun_infor["template"]}\n' + fun_name_and_intro_text += ( + f"{fun_infor['name']}\n" + f"\'\'\'\'\'\'\'\'\'\'\'\n" + f"{fun_infor['doxygen']}\n" + f"\n" + ) + f.write(fun_name_and_intro_text) + + if fun_infor['note'] != "": + fun_note_text = ( + f'..note::\n' f'\t{fun_infor["note"]}\n' f'\n' + ) + f.write(fun_note_text) + + if len(fun_infor['parameter']) != 0: + parameters_text = ( + f"**参数**\n" f"\'\'\'\'\'\'\'\'\'\'\'\n" + ) + f.write(parameters_text) + for param in fun_infor['parameter'].keys(): + param_text = f"\t- **{param}**" + if fun_infor['parameter'][param]['type'] != "": + param_text += f" ({fun_infor['parameter'][param]['type']})" + if fun_infor['parameter'][param]['intro'] != "": + param_text += f" - {fun_infor['parameter'][param]['intro']}" + param_text += "\n" + f.write(param_text) + f.write('\n') + + if ( + fun_infor['returns'] != '' + and 'void' not in fun_infor['returns'] + ): + fun_return_text = ( + f"**返回**\n" + f"\'\'\'\'\'\'\'\'\'\'\'\n" + f"{fun_infor['returns']}\n" + f"\n" + ) + f.write(fun_return_text) + + def create_file_en(self, save_dir, language): + with open(save_dir, 'w', encoding='utf8') as f: + head_text = f'.. _{language}_api_{self.class_name}:\n' f'\n' + f.write(head_text) + + name_and_intro_text = ( + f'{self.class_name}[source](https://github.com/PaddlePaddle/Paddle/blob/{self.branch}/{self.file_path})\n' + f'-------------------------------\n' + f'\n' + f'.. cpp:class:: {self.init_func}\n' + f'{self.introduction}\n' + f'\n' + ) + f.write(name_and_intro_text) + + if self.note != "": + note_text = f'..note::\n' f'\t{self.note}\n' f'\n' + f.write(note_text) + + define_path_text = ( + f'Path\n' f':::::::::::::::::::::\n' f'{self.file_path}\n' f'\n' + ) + f.write(define_path_text) + + if self.class_function_number != 0: + class_function_head_text = ( + f'Methods\n' f':::::::::::::::::::::\n' f'\n' + ) + f.write(class_function_head_text) + + for fun_infor in self.functions_infor: + if fun_infor['template'] == "": + fun_name_and_intro_text = "" + else: + fun_name_and_intro_text = f'{fun_infor["template"]}\n' + fun_name_and_intro_text += ( + f"{fun_infor['name']}\n" + f"\'\'\'\'\'\'\'\'\'\'\'\n" + f"{fun_infor['doxygen']}\n" + f"\n" + ) + f.write(fun_name_and_intro_text) + + if fun_infor['note'] != "": + fun_note_text = ( + f'..note::\n' f'\t{fun_infor["note"]}\n' f'\n' + ) + f.write(fun_note_text) + + if len(fun_infor['parameter']) != 0: + parameters_text = ( + f"**Parameters**\n" f"\'\'\'\'\'\'\'\'\'\'\'\n" + ) + f.write(parameters_text) + for param in fun_infor['parameter'].keys(): + param_text = f"\t- **{param}**" + if fun_infor['parameter'][param]['type'] != "": + param_text += f" ({fun_infor['parameter'][param]['type']})" + if fun_infor['parameter'][param]['intro'] != "": + param_text += f" - {fun_infor['parameter'][param]['intro']}" + param_text += "\n" + f.write(param_text) + f.write('\n') + + if ( + fun_infor['returns'] != '' + and 'void' not in fun_infor['returns'] + ): + fun_return_text = ( + f"**Returns**\n" + f"\'\'\'\'\'\'\'\'\'\'\'\n" + f"{fun_infor['returns']}\n" + f"\n" + ) + f.write(fun_return_text) + + +def generate_overview(overview_list, save_dir, language): + if language == 'cn': + generate_overview_cn(overview_list, save_dir, language) + elif language == 'en': + generate_overview_en(overview_list, save_dir, language) + else: + print('Error language! ') + + +def generate_overview_cn(overview_list, root_dir, LANGUAGE): + dir_path = os.path.join(root_dir, LANGUAGE) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + rst_dir = os.path.join(dir_path, 'index.rst') + with open(rst_dir, 'w', encoding='utf8') as f: + head_text = ( + f'# C++ 文档\n' + f'欢迎使用飞桨框架(PaddlePaddle),PaddlePaddle 是一个易用、高效、灵活、可扩展的深度学习框架,致力于让深度学习技术的创新与应用更简单。\n' + f'在本版本中,飞桨框架对 C++ 接口做了许多优化,您可以参考下表来了解飞桨框架最新版的 C++ 目录结构与说明。此外,您可参考 PaddlePaddle 的 GitHub 了解详情。\n' + f'本文档的应用场景为 C++ 训练,并主要在自定义算子开发时使用。本文档内容持续迭代中,在下个版本可能会有不兼容的升级,如果不介意随下一版本升级的话,可以使用,追求稳定的话则不建议使用。\n' + f'\n' + ) + f.write(head_text) + + f.write('## 头文件\n') + namespace_dict = {} # 用于对齐namespace + + for h_dict in overview_list: + basename = os.path.basename(h_dict["h_file"]) + h_head_text = f'### [{basename}]({h_dict["h_file"]})\n' + f.write(h_head_text) + + # TODO: add url link + if len(h_dict["class"]) > 0: + # write class + h_class_text = f'#### classes\n' + f.write(h_class_text) + for class_name in h_dict["class"]: + class_namespace = class_name["namespace"] + "::" + # 在这里初始化字典为一个数组 + if class_namespace not in namespace_dict.keys(): + namespace_dict[class_namespace] = [] + namespace_dict[class_name["namespace"] + "::"].append( + class_name['name'].replace("PADDLE_API", "") + ) + f.write( + '- ' + + class_name['name'].replace("PADDLE_API", "") + + '\n' + ) + + if len(h_dict["function"]) > 0: + # write functions + h_function_text = f'#### functions\n' + f.write(h_function_text) + for function_name in h_dict["function"]: + if function_name["namespace"] not in namespace_dict.keys(): + namespace_dict[function_name["namespace"]] = [] + namespace_dict[function_name["namespace"]].append( + function_name['name'] + ) + f.write('- ' + function_name['name'] + '\n') + + f.write('\n') + + namespace_text = '## 命名空间\n' + for namespace in namespace_dict.keys(): + namespace_text += f'### {namespace}\n' + for name in namespace_dict[namespace]: + namespace_text += f'- {name}\n' + namespace_text += '\n' + f.write(namespace_text) + + +def generate_overview_en(overview_list, root_dir, LANGUAGE): + dir_path = os.path.join(root_dir, LANGUAGE) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + rst_dir = os.path.join(dir_path, 'index.rst') + with open(rst_dir, 'w', encoding='utf8') as f: + head_text = ( + f'# C++ API Reference\n' + f'PaddlePaddle (PArallel Distributed Deep LEarning) is an efficient, flexible, and extensible deep learning framework, commits to making the innovation and application of deep learning technology easier.\n' + f'In this version, PaddlePaddle has made many optimizations to the C++ APIs. You can refer to the following table to understand the C++ API directory structure and description of the latest version of PaddlePaddle. In addition, you can refer to PaddlePaddle’s GitHub for details.\n' + f'The application scenario of this document is C++training and is mainly used in the development of custom operators. The content of this document is continuously iterating, and there may be incompatible upgrades in the next version. If you don’t mind upgrading with the next version, you can use it. Otherwise, it is not recommended to use it.\n' + f'\n' + ) + f.write(head_text) + + f.write('## include\n') + namespace_dict = {} + + for h_dict in overview_list: + basename = os.path.basename(h_dict["h_file"]) + h_head_text = f'### [{basename}]({h_dict["h_file"]})\n' + f.write(h_head_text) + + # TODO: add url link + if len(h_dict["class"]) > 0: + # write class + h_class_text = f'#### classes\n' + f.write(h_class_text) + for class_name in h_dict["class"]: + class_namespace = class_name["namespace"] + "::" + if class_namespace not in namespace_dict.keys(): + namespace_dict[class_namespace] = [] + namespace_dict[class_name["namespace"] + "::"].append( + class_name['name'].replace("PADDLE_API", "") + ) + f.write( + '- ' + + class_name['name'].replace("PADDLE_API", "") + + '\n' + ) + + if len(h_dict["function"]) > 0: + # write functions + h_function_text = f'#### functions\n' + f.write(h_function_text) + for function_name in h_dict["function"]: + if function_name["namespace"] not in namespace_dict.keys(): + namespace_dict[function_name["namespace"]] = [] + namespace_dict[function_name["namespace"]].append( + function_name['name'] + ) + f.write('- ' + function_name['name'] + '\n') + + f.write('\n') + + namespace_text = '## namespace\n' + for namespace in namespace_dict.keys(): + namespace_text += f'### {namespace}\n' + for name in namespace_dict[namespace]: + namespace_text += f'- {name}\n' + namespace_text += '\n' + f.write(namespace_text) From d59e6099d9b0aee61f466c6385c1017736941045 Mon Sep 17 00:00:00 2001 From: Liyulingyue <852433440@qq.com> Date: Fri, 9 Jun 2023 06:41:57 +0800 Subject: [PATCH 2/2] add note --- ci_scripts/CAPItools/README.md | 27 ++++++++-- ci_scripts/CAPItools/main.py | 74 +++++++++++++++------------ ci_scripts/CAPItools/utils.py | 20 ++++++-- ci_scripts/CAPItools/utils_helper.py | 75 +++++++++++++++------------- 4 files changed, 121 insertions(+), 75 deletions(-) diff --git a/ci_scripts/CAPItools/README.md b/ci_scripts/CAPItools/README.md index fc5e81608d7..8742fccc07a 100644 --- a/ci_scripts/CAPItools/README.md +++ b/ci_scripts/CAPItools/README.md @@ -3,9 +3,11 @@ CAPI tools 用于一键生成 C++ 的 rst 文档。 ## 调用方式 ```python -python main.py [source dir] [target dir] +python main.py ``` +若不设置`source dir`和`target dir`,则默认先查找已安装的`paddlepaddle`包环境。 + 其中: - source dir 是安装后的 Paddle C++ API 声明路径。 例如`venv/Lib/site-packages/paddle/include/paddle`。 - target dir 目标文件保存路径。 @@ -30,5 +32,24 @@ target dir ## 获取最新 PaddlePaddle pip install python -m pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/windows/cpu-mkl-avx/develop.html -## 特别说明 -有少量报错为正常显现,将在后续修正 +## 代码结构 + +### `main.py`文件主要用于处理和筛选包文件, 并调用`utils_helper.py`中的函数进行文件生成 +```python +def analysis_file() # 用于解析文件内容(多线程不安全) + +def generate_docs() # 用于创建目录并传值给 utils_helper.py 中的函数进行文件生成 + +def cpp2py() # 用于筛选出 cpp api 和 py api 相对应的函数名称 +``` + +### `utils_helper.py`文件主要存放函数生成、解析, 以及文件写入的工作 +```python + +class func_helper(object) # 用于生成和解析方法 + decode() # 用于解析输出输出参数、函数名称、返回值、函数注释信息 +class class_helper(object) # 用于生成和解析类 + decode() # 同 func_helper() + +def generate_overview() # 用于生成 overview.rst 文件 +``` diff --git a/ci_scripts/CAPItools/main.py b/ci_scripts/CAPItools/main.py index 484ef28ef4f..8c442d43495 100644 --- a/ci_scripts/CAPItools/main.py +++ b/ci_scripts/CAPItools/main.py @@ -7,30 +7,13 @@ import os import traceback import sys +import re from utils_helper import func_helper, class_helper, generate_overview from utils import get_PADDLE_API_class, get_PADDLE_API_func -# TODO 通过已安装的 paddle 来查找 include -# import paddle -# import inspect -# -# # 获取已安装paddle的路径 -# print(os.path.dirname(inspect.getsourcefile(paddle))) - -# TODO 需要单独处理一下这种 -""" -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -/** - * Get the current CUDA stream for the passed CUDA device. - */ -PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place); -#endif -""" - - -# 获取namespace +# 解析所有的函数, 类, 枚举, 返回一个字典 # 多线程使用并不安全, 请不要使用多线程 def analysis_file(path): header = CppHeaderParser.CppHeader(path, encoding='utf8') @@ -39,6 +22,7 @@ def analysis_file(path): # 生成文件 +# 根据给定的list内容,生成对应的文档信息 def generate_docs( all_funcs, all_class, cpp2py_api_list, save_dir, LANGUAGE="cn" ): @@ -50,11 +34,17 @@ def generate_docs( # 这个反斜杠需要单独处理, 在 linux 下 func_name = item["name"].replace("/", "") + + # Note: 操作符仅不生成rst,实际上在Overview列表依然会呈现以提示存在此操作符 + if func_name.startswith('operator'): + checkwords = func_name.replace('operator', '', 1) + if re.search(r"\w", checkwords) == None: + continue # 跳过操作符声明 rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst") # avoid a filename such as operate*.rst, only windows try: helper = func_helper(item, cpp2py_api_list) - helper.create_file(rst_dir, LANGUAGE) + helper.create_and_write_file(rst_dir, LANGUAGE) except: print(traceback.format_exc()) print('FAULT GENERATE:' + rst_dir) @@ -69,13 +59,14 @@ def generate_docs( rst_dir = os.path.join(save_dir, LANGUAGE, path, func_name + ".rst") try: helper = class_helper(item) - helper.create_file(rst_dir, LANGUAGE) + helper.create_and_write_file(rst_dir, LANGUAGE) except: print(traceback.format_exc()) print('FAULT GENERATE:' + rst_dir) # cpp 对应 python api +# 用于存储 api 的名称, 用于后续生成对应python api文档链接 def cpp2py(data: dict): cpp2py_api_list = [] for i in data["using"]: @@ -84,11 +75,27 @@ def cpp2py(data: dict): return cpp2py_api_list +# 运行主函数,主要流程如下 +# 1. 确定生成的目录 +# 2. 提取待生成文档的PADDLE_API list +# 3. 生成文档 if __name__ == "__main__": - assert len(sys.argv) == 3 + root_dir = '' + save_dir = '.' # 默认保存在当前目录 + if len(sys.argv) == 3: + root_dir = sys.argv[1] + save_dir = sys.argv[2] - root_dir = sys.argv[1] - save_dir = sys.argv[2] + if root_dir == '': + try: + import paddle + import inspect + + root_dir = os.path.dirname(inspect.getsourcefile(paddle)) + except: + # for simple run + root_dir = '../paddle' + save_dir = '.' # 默认保存在当前目录 all_funcs = [] all_class = [] @@ -96,14 +103,18 @@ def cpp2py(data: dict): overview_list = [] for home, dirs, files in os.walk(root_dir): for file_name in files: + # 跳过不需要处理的文件 + if file_name.split(".")[-1] not in ["cc", "cu", "h"]: + continue + file_path = os.path.join(home, file_name) - # 处理 cpp 和 py api对应的文件 + # 处理 cpp 和 py api对应的文件, 目前只有这个文件内的 cpp api和 python api是对应的 if file_name == "tensor_compat.h": cpp2py_data = analysis_file(file_path) cpp2py_api_list = cpp2py(cpp2py_data).copy() # 跳过文件中未包含PADDLE_API - with open(file_path, encoding='utf8') as f: + with open(file_path, encoding='utf-8') as f: if 'PADDLE_API ' not in f.read(): continue @@ -125,16 +136,15 @@ def cpp2py(data: dict): } ) + # 生成文档 generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "cn") generate_docs(all_funcs, all_class, cpp2py_api_list, save_dir, "en") - # TODO: delete the try-except after every thing is prepare - try: - generate_overview(overview_list, save_dir, "cn") - generate_overview(overview_list, save_dir, "en") - except: - print('index error') + # 生成 overview + generate_overview(overview_list, save_dir, "cn") + generate_overview(overview_list, save_dir, "en") + # 统计信息 print("PADDLE_API func count: ", len(all_funcs)) print("PADDLE_API class count: ", len(all_class)) print("cpp2py api count: ", len(cpp2py_api_list)) diff --git a/ci_scripts/CAPItools/utils.py b/ci_scripts/CAPItools/utils.py index ddbf2756ad7..02624e0a6a5 100644 --- a/ci_scripts/CAPItools/utils.py +++ b/ci_scripts/CAPItools/utils.py @@ -1,4 +1,6 @@ # 获取存在 PADDLE_API func 数组的名称 +# CppHeaderParser 解析后以字典形式保存数据,'debug' 字段中保存了原始信息 +# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的函数 def get_PADDLE_API_func(data: dict): result = [] for i in data["functions"]: @@ -8,20 +10,24 @@ def get_PADDLE_API_func(data: dict): # 获取存在 PADDLE_API class 数组的名称 +# CppHeaderParser 解析后以字典形式保存数据 +# 如果 PADDLE_API 在字段中,则表明该 API 是外部暴露的类 def get_PADDLE_API_class(data: dict): result = [] for classname in data["classes"]: - # TODO 目前没有 PADDLE_API 是 struct 的 + # Note 目前没有 PADDLE_API 是 struct 的 if data["classes"][classname]["declaration_method"] == "struct": continue - # TODO 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool + # Note 这里需要处理一下, 因为类名和 PADDLE_API 会粘在一起, 例: PADDLE_APIDeviceContextPool if "PADDLE_API" in classname: result.append(data["classes"][classname]) return result # 获取方法中的参数parameters +# 根据解析的参数字典,添加对应的参数名、参数类型、说明 +# 有时候会将“&”解析为参数名,需要特殊处理 def get_parameters(parameters): # parameter_api = "" # 这里解析是给api使用的 (暂时不用) parameter_dict = {} @@ -29,9 +35,9 @@ def get_parameters(parameters): parameter_type_tmp = i['type'].replace(" &", "").replace(" *", "") # * 和 & 情况 # parameter_api += parameter_type_tmp - if i["reference"] == 1: - # parameter_api += "&" - parameter_type_tmp += "&" + + # 添加引用 + parameter_type_tmp += "&" * i["reference"] if i["pointer"] == 1: # parameter_api += "*" parameter_type_tmp += "*" @@ -55,6 +61,10 @@ def get_parameters(parameters): return parameter_dict +# 将注释内容解析为说明字典 +# 解析前: @brief Construct a Tensor from a buffer pointed to by `data` @note `from_blob` doesn’t copy or move data, Modifying the constructed tensor is equivalent to modifying the original data. @param data The pointer to the memory buffer. @param shape The dims of the tensor. @param dtype The data type of the tensor, should correspond to data type of`data`. See PD_FOR_EACH_DATA_TYPE in `phi/common/data_type.h` @param layout The data layout of the tensor. @param place The place where the tensor is located.If `place` is default value, it will be inferred from `data`,However, the feature is only supported on CPU or GPU.If `place` is not default value, make sure that `place` is equalto the place of `data` @param deleter A function or function object that will be called to free thememory buffer. @return A Tensor object constructed from the buffer +# 以@作为分隔符,索引关键字包括'brief'、'note'、'return'、'param' +# 解析后分别将对应关键字后的内容放入字典对应关键字后 def parse_doxygen(doxygen): doxygen_dict = { 'intro': '', diff --git a/ci_scripts/CAPItools/utils_helper.py b/ci_scripts/CAPItools/utils_helper.py index 4107a190d2a..15f339dd4d9 100644 --- a/ci_scripts/CAPItools/utils_helper.py +++ b/ci_scripts/CAPItools/utils_helper.py @@ -2,7 +2,10 @@ from utils import get_parameters, parse_doxygen - +# 用于生成API文档的辅助类 +# __init__ 初始化函数,调用decode +# decode 用于解析CppHeaderParser的解析信息 +# create_and_write_file 根据指定的语言类型,在指定目录生成对应的文档 class func_helper(object): def __init__(self, function_dict, cpp2py_api_list): super(func_helper, self).__init__() @@ -11,9 +14,8 @@ def __init__(self, function_dict, cpp2py_api_list): self.decode() def decode(self): - # TODO 这里要看一下 operator== 这种情况能不能正常解析 + # 解析 api 信息 self.func_name = self.function_dict["name"] - # 解析api self.api = self.function_dict["debug"].replace("PADDLE_API ", "") self.namespace = self.function_dict["namespace"].replace("::", "_") doxygen = ( @@ -27,7 +29,6 @@ def decode(self): self.note = "" - # TODO 如果使用已安装的 paddle 包需要调整 self.file_path = self.function_dict["filename"].replace("../", "") if len(self.function_dict["parameters"]) != 0: @@ -53,15 +54,15 @@ def decode(self): 'param_intro' ][param_name] - def create_file(self, save_dir, language): + def create_and_write_file(self, save_dir, language): if language == 'cn': - self.create_file_cn(save_dir, language) + self.create_and_write_file_cn(save_dir, language) elif language == 'en': - self.create_file_en(save_dir, language) + self.create_and_write_file_en(save_dir, language) else: print('Error language! ') - def create_file_cn(self, save_dir, language): + def create_and_write_file_cn(self, save_dir, language): with open(save_dir, 'w', encoding='utf8') as f: head_text = ( f'.. _{language}_api_{self.namespace}{self.func_name}:\n' f'\n' @@ -117,7 +118,7 @@ def create_file_cn(self, save_dir, language): if 'void' not in self.returns: f.write(return_text) - def create_file_en(self, save_dir, language): + def create_and_write_file_en(self, save_dir, language): with open(save_dir, 'w', encoding='utf8') as f: head_text = ( f'.. _{language}_api_{self.namespace}{self.func_name}:\n' f'\n' @@ -174,6 +175,10 @@ def create_file_en(self, save_dir, language): f.write(return_text) +# 用于生成Class文档的辅助类 +# __init__ 初始化函数,调用decode +# decode 用于解析CppHeaderParser的解析信息 +# create_and_write_file 根据指定的语言类型,在指定目录生成对应的文档 class class_helper(object): def __init__(self, class_dict): super(class_helper, self).__init__() @@ -181,9 +186,8 @@ def __init__(self, class_dict): self.decode() def decode(self): - self.branch = "develop" # TODO 这里可以看看从包里面获取 + self.branch = "develop" # Note 这里可以看看从包里面获取 self.class_name = self.class_dict["name"].replace("PADDLE_API", "") - # TODO 如果使用已安装的 paddle 包需要调整 self.file_path = self.class_dict["filename"].replace("../", "") doxygen = ( self.class_dict.get("doxygen", "") @@ -206,14 +210,11 @@ def decode(self): self.init_func = self.class_name self.functions_infor = [] - # TODO: 未来可能在private也有函数 + # Note: 未来可能在private也有函数 + # Note: 函数内构造函数可能解析有问题,需要后期查验 self.class_function_number = len(self.class_dict["methods"]["public"]) for i in range(self.class_function_number): ith_function = self.class_dict["methods"]["public"][i] - if self.class_name in ith_function["name"] and len( - ith_function["debug"] - ) > len(self.init_func): - self.init_func = ith_function["debug"] function_name = ith_function['debug'] # 获取描述 @@ -235,7 +236,7 @@ def decode(self): # 获取返回值 # returns = ith_function["returns"].replace("PADDLE_API ", "") returns = ith_function["rtnType"] - # TODO Template 没有仅对class起作用,可能需要同步添加到API中 + # Note Template 没有仅对class起作用,可能需要同步添加到API中 template = "" if ith_function['template'] != False: template = ith_function['template'] @@ -250,7 +251,7 @@ def decode(self): returns = doxygen_dict['returns'] if doxygen_dict['param_intro'] != {}: for param_name in doxygen_dict['param_intro'].keys(): - # TODO: 可能param_name 不同步,需要注意 + # Note: 可能param_name 不同步,需要注意 if param_name in parameter_dict.keys(): parameter_dict[param_name]['intro'] = doxygen_dict[ 'param_intro' @@ -270,15 +271,15 @@ def decode(self): # if '@' in self.doxygen: # print('CLASS: ' + self.file_path + ' - ' + self.class_name) - def create_file(self, save_dir, language): + def create_and_write_file(self, save_dir, language): if language == 'cn': - self.create_file_cn(save_dir, language) + self.create_and_write_file_cn(save_dir, language) elif language == 'en': - self.create_file_en(save_dir, language) + self.create_and_write_file_en(save_dir, language) else: print('Error language! ') - def create_file_cn(self, save_dir, language): + def create_and_write_file_cn(self, save_dir, language): with open(save_dir, 'w', encoding='utf8') as f: head_text = f'.. _{language}_api_{self.class_name}:\n' f'\n' f.write(head_text) @@ -354,7 +355,7 @@ def create_file_cn(self, save_dir, language): ) f.write(fun_return_text) - def create_file_en(self, save_dir, language): + def create_and_write_file_en(self, save_dir, language): with open(save_dir, 'w', encoding='utf8') as f: head_text = f'.. _{language}_api_{self.class_name}:\n' f'\n' f.write(head_text) @@ -431,6 +432,8 @@ def create_file_en(self, save_dir, language): f.write(fun_return_text) +# 用于生成Overview页面 +# 根据指定的语言类型,在指定目录生成总览文档 def generate_overview(overview_list, save_dir, language): if language == 'cn': generate_overview_cn(overview_list, save_dir, language) @@ -456,7 +459,7 @@ def generate_overview_cn(overview_list, root_dir, LANGUAGE): ) f.write(head_text) - f.write('## 头文件\n') + f.write('## 头文件索引\n') namespace_dict = {} # 用于对齐namespace for h_dict in overview_list: @@ -464,7 +467,7 @@ def generate_overview_cn(overview_list, root_dir, LANGUAGE): h_head_text = f'### [{basename}]({h_dict["h_file"]})\n' f.write(h_head_text) - # TODO: add url link + # Note: add url link if len(h_dict["class"]) > 0: # write class h_class_text = f'#### classes\n' @@ -497,7 +500,8 @@ def generate_overview_cn(overview_list, root_dir, LANGUAGE): f.write('\n') - namespace_text = '## 命名空间\n' + # 根据 namespace 进行分级写入 + namespace_text = '## 命名空间索引\n' for namespace in namespace_dict.keys(): namespace_text += f'### {namespace}\n' for name in namespace_dict[namespace]: @@ -506,6 +510,7 @@ def generate_overview_cn(overview_list, root_dir, LANGUAGE): f.write(namespace_text) +# 与 generate_overview_cn 实现原理一致 def generate_overview_en(overview_list, root_dir, LANGUAGE): dir_path = os.path.join(root_dir, LANGUAGE) if not os.path.exists(dir_path): @@ -522,7 +527,7 @@ def generate_overview_en(overview_list, root_dir, LANGUAGE): ) f.write(head_text) - f.write('## include\n') + f.write('## Index by header file\n') namespace_dict = {} for h_dict in overview_list: @@ -530,7 +535,7 @@ def generate_overview_en(overview_list, root_dir, LANGUAGE): h_head_text = f'### [{basename}]({h_dict["h_file"]})\n' f.write(h_head_text) - # TODO: add url link + # Note: add url link if len(h_dict["class"]) > 0: # write class h_class_text = f'#### classes\n' @@ -562,10 +567,10 @@ def generate_overview_en(overview_list, root_dir, LANGUAGE): f.write('\n') - namespace_text = '## namespace\n' - for namespace in namespace_dict.keys(): - namespace_text += f'### {namespace}\n' - for name in namespace_dict[namespace]: - namespace_text += f'- {name}\n' - namespace_text += '\n' - f.write(namespace_text) + namespace_text = '## Index by namespace\n' + for namespace in namespace_dict.keys(): + namespace_text += f'### {namespace}\n' + for name in namespace_dict[namespace]: + namespace_text += f'- {name}\n' + namespace_text += '\n' + f.write(namespace_text)