Skip to content

Instantly share code, notes, and snippets.

@yanyaoer
Created July 22, 2024 06:37
Show Gist options
  • Save yanyaoer/62a95dd231693ede80a00718fa5c9df2 to your computer and use it in GitHub Desktop.
Save yanyaoer/62a95dd231693ede80a00718fa5c9df2 to your computer and use it in GitHub Desktop.
gitlab-cr with llm
#!/usr/bin/env python3
import json
# import sys
import logging
import os
import pathlib
import subprocess
import time
from urllib import parse, request
from urllib.error import HTTPError
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
# https://{gitlab_domain}.com/-/profile/personal_access_tokens
gitlab_key = os.getenv("GITLAB_KEY")
gpt_key = os.getenv("GPT_KEY")
if not gitlab_key or not gpt_key:
logger.error(
"need gitlab and gpt key: " + "$ GPT_KEY=xxx GITLAB_KEY=xxx python3 gitlab-ci.py"
)
exit(0)
__base_url__ = "https://{gitlab_doamin}.com/api/v4"
__gpt_url__ = "https://{gpt_selfhost}.openai.azure.com"
__ignore_suffix__ = os.getenv("__IGNORE_SUFFIX__", "md,so,bin,jpg,gif,png").split(",")
def req(url, data=None, headers=None, method="GET"):
if data:
data = json.dumps(dict(query=data)).encode()
q = request.Request(
__base_url__ + url,
headers=headers
or {
"PRIVATE-TOKEN": gitlab_key,
"Content-Type": "application/json",
},
data=data,
method=method,
)
return json.load(request.urlopen(q))
def execute(cmd):
"""
>>> execute('ls readme.md')
b'readme.md\\n'
"""
logger.info(cmd)
process = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
(result, error) = process.communicate()
rc = process.wait()
if rc != 0:
logger.error(error, cmd)
return result
def code_review_by_gpt(diff_content, model="gpt-4o", ignore=False):
url = (
f"{__gpt_url__}/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01"
)
headers = {
"Content-Type": "application/json",
"api-key": gpt_key,
}
data = {
"messages": [
{
"role": "system",
"content": 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn',
},
{
"role": "user",
"content": f"{diff_content} Code review",
},
],
"model": model,
"max_tokens": 4096,
"temperature": 0.1,
"top_p": 1,
"n": 1,
"stream": False,
}
data = json.dumps(data).encode()
q = request.Request(url, headers=headers, data=data)
try:
res = request.urlopen(q)
return json.load(res)["choices"][0]["message"]["content"]
except HTTPError as e:
if e.code == 429 and not ignore:
print(e.headers)
time.sleep(int(e.headers.get("Retry-After", 5)))
return code_review_by_gpt(diff_content, ignore=True)
def code_review_by_localLLM(block): #unused
prompt = 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn'
# ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \\
# ~/Public/models/codellama-7b.Q4_K_M.gguf --temp 0.5 -p \\
cmd = f"""bash -c \" ~/bin/llama-cli --log-disable -m ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \'作为一名专业的程序员,请对以下代码变更内容进行审查,并给出修改意见和逻辑总结: {block}\' \"
""".encode("utf8")
# logger.info(f'\r\r======\r{cmd}')
return execute(cmd)
def add_comment(url, note, path, line):
# print(note)
q = request.Request(
__base_url__ + url,
headers={"PRIVATE-TOKEN": gitlab_key},
data=parse.urlencode(
{
"note": "Auto Code Review by GPT-4o: \n" + note,
"path": path,
"line": line,
"line_type": "new",
}
).encode(),
method="POST",
)
return json.load(request.urlopen(q))
def get_commits(pid, branch="master"):
"""
>>> get_commits('tech-platform%2Fmagical', branch='llm-cr')
"""
r = req(f"/projects/{pid}/repository/commits?ref_name={branch}")
for x in r[0:1]:
df = req(f'/projects/{pid}/repository/commits/{x["id"]}/diff')
logging.info(df)
if not df:
logger.error("Failed to get diff content")
def start_review(pid, sha, dry_run=False):
"""
>>> start_review('tech-platform%2Fmagical', '947153c5420d765c2d0a1e729c629d8981ecbd26', dry_run=0)
"""
df = req(f"/projects/{pid}/repository/commits/{sha}/diff")
if not df:
logger.error("Failed to get diff content")
for block in df:
if pathlib.Path(block["new_path"]).suffix[1:] in __ignore_suffix__:
logger.warning(f'\r\rIgnored file: {block["new_path"]}')
continue
note = code_review_by_gpt(block)
logger.info(f'\r\r======\r {block["new_path"]} \r\r {note}')
if dry_run or not note:
return
cmt = add_comment(
url=f"/projects/{pid}/repository/commits/{sha}/comments",
note=note,
path=block["new_path"],
line=int(block["diff"].split("@@")[1].split(",")[0].replace(" -", "")),
)
logger.info(f"\r\r post comment: \r\r{cmt}")
def main():
project = os.getenv("CI_PROJECT_PATH")
sha = os.getenv("CI_COMMIT_SHA")
if project and sha:
pid = parse.quote_plus(project)
start_review(pid, sha)
__doc__ = """
>>> execute('echo 123')
b'123\\n'
>>> pathlib.Path('a.jpg').suffix[1:] in __ignore_suffix__
True
"""
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment