Skip to content

Instantly share code, notes, and snippets.

@Keshav13142
Last active July 10, 2024 13:10
Show Gist options
  • Save Keshav13142/6ea46064c45e81ec60fbe3417cfa5ff9 to your computer and use it in GitHub Desktop.
Save Keshav13142/6ea46064c45e81ec60fbe3417cfa5ff9 to your computer and use it in GitHub Desktop.
Gitlab stuff
import os
import sqlite3
import requests
from datasets import DatasetDict, load_dataset
from dotenv import load_dotenv
from flask import Flask, jsonify, redirect, render_template, request
from transformers import (
DataCollatorForLanguageModeling,
GPT2LMHeadModel,
GPT2Tokenizer,
Trainer,
TrainingArguments,
)
load_dotenv()
PRIVATE_TOKEN = os.getenv("PRIVATE_TOKEN")
PROJECT_ID = os.getenv("PROJECT_ID")
GITLAB_REDIRECT_URL = os.getenv("GITLAB_OAUTH_REDIRECT_URL")
GITLAB_APP_ID = os.getenv("GITLAB_APP_ID")
GITLAB_OAUTH_SCOPES = os.getenv("GITLAB_OAUTH_SCOPES")
app = Flask(__name__)
ACCESS_TOKEN = None
@app.route("/webhook", methods=["POST"])
def webhook():
if request.method == "POST":
data = request.json
print(f"Received webhook data: {data}")
# Process the webhook payload here
return jsonify({"status": "success"}), 200
else:
return jsonify({"status": "method not allowed"}), 405
@app.route("/", methods=["GET"])
def index_page():
if request.method == "GET":
return render_template("index.html")
@app.route("/connect", methods=["GET"])
def connect_page():
if request.method == "GET":
if ACCESS_TOKEN != None:
return redirect("/success")
return redirect(
f"https://gitlab.com/oauth/authorize?client_id={GITLAB_APP_ID}&redirect_uri={GITLAB_REDIRECT_URL}&response_type=code&state=random&scope={GITLAB_OAUTH_SCOPES}"
)
@app.route("/oauth/callback", methods=["GET"])
def get_oauth_token():
global ACCESS_TOKEN
if request.method == "GET":
ACCESS_TOKEN = request.args["code"]
if ACCESS_TOKEN != None:
return redirect("/success")
return redirect("/")
@app.route("/success", methods=["GET"])
def success_page():
global ACCESS_TOKEN
if request.method == "GET":
# if access_token != None:
return render_template("success.html")
# else:
# return redirect("/")
@app.route("/add-project", methods=["GET", "POST"])
def add_project():
if request.method == "GET":
if ACCESS_TOKEN == None:
return redirect("/")
return render_template("add-project.html")
elif request.method == "POST":
project_id = request.form["project_id"]
print(project_id)
if project_id == None:
return redirect("/add-project")
success = add_webhook(project_id)
if success:
return redirect("/add-project")
def add_webhook(project_id):
response = requests.post(
f"https://gitlab.com/api/v4/projects/{project_id}/hooks",
json={
"url": "http://localhost:5000/webhook",
"name": "IntelliOps Webhook",
"description": "Custom webhook to notify the server about new issues",
"issues_events": True,
},
headers={
"Authorization": f"PRIVATE_TOKEN {ACCESS_TOKEN}",
"Content-Type": "application/json",
},
)
print(response)
return True if response.status_code == 201 else False
# @app.route('/webhook', methods=['POST'])
# def webhook_handler():
# if request.headers['Content-Type'] == 'application/json':
# payload = request.json
# # Send a quick response indicating the webhook was received
# response_data = {'message': 'Webhook received. Starting training...'}
# status_code = 200
# # Start processing the payload asynchronously
# import threading
# threading.Thread(target=main, args=(payload,)).start()
# return jsonify(response_data), status_code
def get_gitlab_issues():
url = f"https://git.virtusa.com/api/v4/projects/{PROJECT_ID}/issues"
headers = {"PRIVATE-TOKEN": PRIVATE_TOKEN}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print(f"Failed to fetch issues, status code: {response.status_code}")
return None
def initialize_database():
conn = sqlite3.connect("issues.db")
c = conn.cursor()
c.execute(
"""
CREATE TABLE IF NOT EXISTS issues (
issue_id INTEGER PRIMARY KEY,
title TEXT NOT NULL,
description TEXT,
solution TEXT);
"""
)
conn.commit()
conn.close()
def update_issues_txt():
conn = sqlite3.connect("issues.db")
c = conn.cursor()
c.execute("SELECT * FROM issues")
issues = c.fetchall()
conn.close()
with open("issues.txt", "a", encoding="utf-8") as f: # Append mode
for issue in issues:
issue_id, title, description, solution = issue
f.write(f"Issue ID: {issue_id}\n")
f.write(f"Title: {title}\n")
f.write(f"Description: {description}\n")
f.write(f"Solution: {solution}\n\n")
def load_and_tokenize_dataset(file_path, tokenizer):
if not os.path.exists(file_path) or os.path.getsize(file_path) == 0:
raise ValueError("The dataset file is empty or does not exist.")
raw_dataset = load_dataset("text", data_files={"train": file_path}, split="train")
def tokenize_function(examples):
return tokenizer(
examples["text"], truncation=True, padding="max_length", max_length=512
)
tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)
return DatasetDict({"train": tokenized_dataset})
def fine_tune_model(dataset, model, tokenizer):
training_args = TrainingArguments(
output_dir="./fine_tuned_model",
overwrite_output_dir=True,
num_train_epochs=1,
per_device_train_batch_size=1,
save_steps=10,
save_total_limit=1,
)
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
)
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=dataset["train"],
)
trainer.train()
def generate_solution(issue_description, model, tokenizer):
if not issue_description.strip():
return "No input text provided."
input_ids = tokenizer.encode(issue_description, return_tensors="pt")
output = model.generate(
input_ids,
max_length=150,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id,
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
def main(payload=None):
initialize_database()
if not payload:
issues = get_gitlab_issues()
else:
issues = payload.get("issues")
if issues:
model_name = "distilgpt2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
for issue in issues:
process_issue(issue, model, tokenizer)
dataset_path = "issues.txt"
dataset = load_and_tokenize_dataset(dataset_path, tokenizer)
fine_tune_model(dataset, model, tokenizer)
output_dir = "./fine_tuned_model"
os.makedirs(output_dir, exist_ok=True)
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
else:
print("No issues fetched from GitLab.")
def process_issue(issue, model, tokenizer):
issue_id = issue["id"]
title = issue["title"]
description = issue.get("description", "No description provided")
conn = sqlite3.connect("issues.db")
c = conn.cursor()
c.execute("SELECT * FROM issues WHERE issue_id=?", (issue_id,))
existing_issue = c.fetchone()
if existing_issue:
print(f"Issue '{title}' already exists in the database. Skipping.")
return
generated_solution = generate_solution(description, model, tokenizer)
c.execute(
"INSERT INTO issues (issue_id, title, description, solution) VALUES (?, ?, ?, ?)",
(issue_id, title, description, generated_solution),
)
conn.commit()
conn.close()
with open("issues.txt", "a", encoding="utf-8") as f:
f.write(f"Issue ID: {issue_id}\n")
f.write(f"Title: {title}\n")
f.write(f"Description: {description}\n")
f.write(f"Solution: {generated_solution}\n\n")
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=5000)
GITLAB_APP_SECRET=
GITLAB_APP_ID=
PRIVATE_TOKEN=
PROJECT_ID=
GOOGLE_VERTEX_API_KEY=
GITLAB_OAUTH_AUTH_TOKEN_URL=https://gitlab.com/oauth/authorize
GITLAB_OAUTH_REDIRECT_URL=http://localhost:5000/oauth/callback
GITLAB_OAUTH_SCOPES=api
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Add a project</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body
class="flex min-h-screen justify-start items-center flex-col pt-20 space-y-14"
>
<h1 class="text-2xl font-semibold">Add new project</h1>
<form
action="/add-project"
method="POST"
class="flex flex-col p-2 space-y-4 w-full max-w-[30%]"
>
<input
class="border-b p-3 border-gray-300 outline-gray-300"
id="project_id"
type="number"
name="project_id"
placeholder="Enter your Project Id"
/>
<button class="p-2 border rounded-md border-[#fc6d26]">
Create project integration
</button>
</form>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Connect Gitlab Account</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body class="flex min-h-screen justify-center items-center">
<form action="/connect" method="GET">
<button
class="flex p-2 items-center border-2 rounded-md pr-5"
type="submit"
>
<img src="/static/gitlab-logo.png" alt="Gitlab logo" class="h-20" />
Connect your GitLab account
</button>
</form>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Success</title>
<script src="https://cdn.tailwindcss.com"></script>
</head>
<body class="flex min-h-screen justify-center items-start pt-5">
<div class="flex flex-col items-center justify-center space-y-5">
<h1 class="text-xl text-green-500">{{message}}</h1>
<form action="/add-project" method="GET">
<button
class="flex p-3 border rounded-md border-[#fc6d26]"
type="submit"
>
➕ Add new project integration
</button>
</form>
</div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment