Skip to content

Instantly share code, notes, and snippets.

@rvalyi
Created July 8, 2024 09:14
Show Gist options
  • Save rvalyi/ba7670c8f341092aa14550f46459b51c to your computer and use it in GitHub Desktop.
Save rvalyi/ba7670c8f341092aa14550f46459b51c to your computer and use it in GitHub Desktop.
import os
import math
import git
from pathlib import Path
from datetime import datetime
from git.objects import base
from slugify import slugify
from typing import List
import subprocess
OUTPUT_DIR = "/home/rvalyi/DEV/odoo_module_diff"
REPO_PATH = "odoo/src"
# ADDON = "purchase"
SINCE_VERSION = 7 # Starting version
LINE_CHANGE_THRESHOLD = 20
DB_STRUCTURE_STRINGS = ["= fields.", "_inherit = ", "_inherits = "]
# Initialize local repo object
repo = git.Repo(REPO_PATH)
def find_commit_by_message(repo: git.Repo, message: str):
"""
Find the first commit with a specific message.
Return the more recent commit if no match is found.
"""
last_commit = None
for commit in repo.iter_commits():
if last_commit is None:
last_commit = commit
if message in str(commit.message):
return commit, True
return last_commit, False
def commit_contains_string(path: str, commit: git.Commit, search_strings: List[str]):
"""
Check if the commit diff contains the specified string.
If a search_string is found in a diff_item, then we count
it only if it's inside a -/+ line or in the 2 lines before.
"""
matches = 0
diffs = []
for parent in commit.parents:
diff = commit.diff(parent, paths=path, create_patch=True)
for diff_item in diff:
for search_string in search_strings:
diff_string = diff_item.diff.decode("utf-8", errors="ignore")
if search_string in diff_string:
diffs.append(diff)
line_minus1 = ""
line_minus2 = ""
for line in diff_string.splitlines():
if line.startswith("-") or line.startswith("+"):
if search_string in (line + line_minus1 + line_minus2):
matches += 1
line_minus1 = ""
line_minus2 = ""
continue
line_minus1 = line
line_minus2 = line_minus1
return diffs, matches
def scan_module_commits(
addon: str, start_commit: git.Commit, end_commit: git.Commit, output_module_dir: str
):
if addon == "base":
module_path = "odoo/addons/base/"
else:
module_path = f"addons/{addon}/models/"
# Get the commits between the two found commits
commits = list(
repo.iter_commits(
f"{start_commit.hexsha}..{end_commit.hexsha}", paths=module_path
)
)
result = []
for commit in commits:
summary = commit.message.strip().splitlines()[0]
if "forwardport" in summary.lower().replace(" ", "").replace("-", ""):
# such ports may present structural changes in the diff
# but we assume they aren't introducing new changes
# since previous serie.
# such false positives were common before version 13.
continue
total_changes = 0
for file in commit.stats.files:
if str(file).startswith(module_path):
total_changes += commit.stats.files[file]["lines"]
migration_diffs, matches = commit_contains_string(
module_path, commit, DB_STRUCTURE_STRINGS
)
if matches > 2 or matches > 1 and total_changes > LINE_CHANGE_THRESHOLD:
pr = ""
for line in commit.message.splitlines():
if " odoo/odoo#" in str(line):
pr = str(line).split(" odoo/odoo#")[1].strip()
result.append(
{
"commit_sha": commit.hexsha,
"total_changes": int(total_changes),
"author": commit.author.name,
"date": datetime.fromtimestamp(commit.committed_date).strftime(
"%Y-%m-%d %H:%M:%S"
),
"summary": summary,
"message": commit.message.strip(),
"pr": f"https://github.com/odoo/odoo/pull/{pr}",
"matches": matches,
"diffs": migration_diffs,
}
)
# Output the result
result.reverse()
for idx, item in enumerate(result):
# print(f"Commit SHA: {item['commit_sha']}")
print(f"\nTotal Changes: {item['total_changes']}")
print(f"Structural Changes: {item['matches']}")
print(f"Date: {item['date']}")
print(f"Summary: {item['summary']}")
print(f"PR: {item['pr']}")
heat_diff = 0
if item["total_changes"] > 400:
heat_diff = 3
elif item["total_changes"] > 200:
heat_diff = 2
elif item["total_changes"] > 100:
heat_diff = 1
heat_struct = int(math.log2(item["matches"] / 2))
heat = f"{'+'*heat_struct + '_' if heat_struct > 0 else ''}{'#'*heat_diff + '_' if heat_diff > 0 else ''}".rjust(
9, "_"
)
filename = f"{output_module_dir}/pr_{str(idx).zfill(3)}{heat}{item['pr'].split('/')[-1]}_{slugify(item['summary'])[:64]}.patch"
print(filename)
with open(filename, "w") as f:
f.write(f"PR: {item['pr']}")
f.write(f"\nCommit SHA: {item['commit_sha']}")
f.write(f"\nStructural Changes: {item['matches']}")
f.write(f"\nTotal Changes: {item['total_changes']}")
f.write(f"\nAuthor: {item['author']}")
f.write(f"\nDate: {item['date']}")
f.write("\n\n" + item["message"])
f.write("\n\n" + "=" * 33 + " pseudo patch: " + "=" * 33 + "\n\n")
for diffs in item["diffs"]:
for diff_item in diffs:
f.write(diff_item.diff.decode("utf-8", errors="ignore"))
def list_addons(repo_path: str, excludes: List[str], min_lines=500, max_deps=40):
directory = Path(f"{repo_path}/addons")
subdirectories = []
for d in directory.iterdir():
if not d.is_dir():
continue
is_excluded = False
for exclude in excludes:
if d.name.startswith(exclude):
is_excluded = True
continue
if is_excluded:
continue
if min_lines:
total_lines = 0
# Walk through the directory
for root, _, files in os.walk(d):
for file in files:
if file.endswith(".py"):
file_path = os.path.join(root, file)
with open(
file_path, "r", encoding="utf-8", errors="ignore"
) as f:
# Count lines in the file
total_lines += sum(1 for _ in f)
if total_lines < min_lines:
continue
# if total_lines > 1000:
# continue # done already
subdirectories.append(d)
return subdirectories
subdirectories = list_addons(
REPO_PATH, excludes=["l10n_", "website_", "test"], min_lines=500, max_deps=40
)
# for d in subdirectories:
# print(d.name)
# Find the start commit
def release_message(serie: int):
if serie >= 15:
release_message = f"bump master release to {serie}"
elif serie >= 13:
release_message = f"bump master release version to {serie}"
elif serie == 12:
release_message = "master is back as 12"
elif serie == 11:
release_message = "[REL] 11.0"
elif serie == 10:
release_message = "[REL] master is version 10"
elif serie == 9:
release_message = "[REL] Odoo 9"
elif serie == 8:
release_message = "[REL] 8.0 RC1"
elif serie == 7:
release_message = "[REL] Release 7.0"
else:
raise RuntimeError("What is wrong with you??")
return release_message
start_commit, _start_found = find_commit_by_message(
repo, release_message(SINCE_VERSION)
)
print(f"Start commit {start_commit}")
# Find the end commit
end_commit, end_found = find_commit_by_message(repo, release_message(SINCE_VERSION + 1))
print(f"End commit {end_commit}")
# Ensure both commits are found
if not start_commit or not end_commit:
print(
f"Could not find the required commits for versions {SINCE_VERSION} and {SINCE_VERSION + 1}"
)
exit(1)
if end_found:
serie = f"{SINCE_VERSION + 1}.0"
else:
serie = f"{SINCE_VERSION}.0"
# 1st we checkout the end_commit, se can read dependencies and line of codes
result = subprocess.run(
[
"git",
"checkout",
end_commit.hexsha,
"-f",
],
cwd=REPO_PATH,
capture_output=True,
text=True,
)
assert result.returncode == 0, result.stderr + "\n" + result.stdout
for d in subdirectories:
print(f"\n***** {d.name} ".ljust(40, "*"))
output_module_dir = (
f"{OUTPUT_DIR}/{d.name}" # TODO we might add a version dir for OpenUpgrade
)
os.makedirs(output_module_dir, exist_ok=True)
result = subprocess.run(
[
"manifestoo",
"--addons-path",
"odoo/src/addons",
f"--odoo-series={serie}",
"--select",
d.name,
"tree",
],
capture_output=True,
text=True,
)
manifestoo_output = result.stdout
with open(f"{output_module_dir}/dependencies.txt", "w") as f:
f.write(manifestoo_output)
scan_module_commits(d.name, start_commit, end_commit, output_module_dir)
@rvalyi
Copy link
Author

rvalyi commented Jul 8, 2024

the list of modules in the README files was done using:

git checkout less-noise README.md; head -n 6 README.md > temp_file && mv temp_file README.md; du -sh -- */ | sort -rh | awk '{sub(//$/, "", $2); print NR ". " $2 " - " $1}' >> README.md

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment