Skip to content

Instantly share code, notes, and snippets.

@Anthony-Hoo
Created June 13, 2024 09:53
Show Gist options
  • Save Anthony-Hoo/2420cf1c45bfb5ba033f4985e3dad3f5 to your computer and use it in GitHub Desktop.
Save Anthony-Hoo/2420cf1c45bfb5ba033f4985e3dad3f5 to your computer and use it in GitHub Desktop.
Add docstrings to Python files using an OpenAI-compatible API
import ast
import requests
class FunctionExtractor(ast.NodeVisitor):
def __init__(self):
self.functions = []
self.function_calls = []
def visit_FunctionDef(self, node):
function_code = ast.unparse(node)
called_functions = [n.func.id for n in ast.walk(node) if isinstance(n, ast.Call) and isinstance(n.func, ast.Name)]
self.functions.append((node, function_code, called_functions))
self.generic_visit(node)
def read_source_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
def extract_functions(source_code):
tree = ast.parse(source_code)
extractor = FunctionExtractor()
extractor.visit(tree)
return extractor.functions
def call_custom_api(function_name, function_code, context_code, api_url):
prompt = (f"请为以下 {function_name} 函数编写中文docstring,对函数的功能进行描述,简要说明执行步骤,并附带上输入参数和返回值的基本情况。注意:注释中不要输出函数代码,不要编写调用示例,仅对目前执行的函数编写docstring,不要对上下文函数编写注释:\n\n"
f"需要编写docstring的函数代码:\n{function_code}\n\n"
f"该函数调用的上下文代码:\n{context_code}")
headers = {
"Authorization": "Bearer <YOUR_API_KEY>",
"Content-Type": "application/json"
}
payload = {
"model": "qwen2:7b",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 8192,
"temperature": 0.5
}
response = requests.post(api_url, headers=headers, json=payload)
response.raise_for_status()
return response.json()['choices'][0]['message']['content']
def insert_docstring(node, docstring):
docstring_node = ast.Expr(value=ast.Constant(value=docstring, kind=None))
node.body.insert(0, docstring_node)
def write_new_source_file(file_path, original_source, functions):
tree = ast.parse(original_source)
for node, _, _ in functions:
for target_node in ast.walk(tree):
if isinstance(target_node, ast.FunctionDef) and target_node.name == node.name:
target_node.body.insert(0, node.body[0])
break
new_code = ast.unparse(tree)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_code)
def main(input_file, output_file, api_url):
source_code = read_source_file(input_file)
functions = extract_functions(source_code)
for node, function_code, called_functions in functions:
print('正在处理函数:', node.name)
context_code = "\n\n".join([code for func, code, _ in functions if func.name in called_functions])
docstring = call_custom_api(node.name, function_code, context_code, api_url)
insert_docstring(node, docstring)
write_new_source_file(output_file, source_code, functions)
if __name__ == "__main__":
input_file = 'input.py'
output_file = 'output.py'
api_url = '<YOUR_OPENAI_API_URL>'
main(input_file, output_file, api_url)
@Anthony-Hoo
Copy link
Author

Anthony-Hoo commented Jun 13, 2024

近期上手了几个陈年屎山项目,由于前几任维护者均已离职,代码理解起来比较困难。
故而写了个脚本给 python 文件一键添加 docstring ,方便理解代码结构功能。
为了省钱,可以选择兼容 openai 的 api 来进行执行,比如用 ollama 来 host 一个本地的大模型。

使用前请修改输入输出的 python 文件路径,api url(对于运行在本地的 ollama ,这里是 “ http://127.0.0.1:11434/v1/chat/completions ”),并定义好需要调用的模型参数(比如代码中的 qwen2:7b )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment