Skip to content

Instantly share code, notes, and snippets.

@zerofancy
Created February 1, 2026 18:46
Show Gist options
  • Select an option

  • Save zerofancy/cad6dc1e6854a6ddd2bc3c66eb32359e to your computer and use it in GitHub Desktop.

Select an option

Save zerofancy/cad6dc1e6854a6ddd2bc3c66eb32359e to your computer and use it in GitHub Desktop.
分析当前仓库下的提交者代码占比
#!/usr/bin/env python3
"""
Git Line Analyzer
This script analyzes all git-tracked text files in the current repository,
counts lines in each file, performs git blame to attribute lines to users,
and generates an HTML report with aggregated statistics.
"""
import os
import sys
import subprocess
import re
from collections import defaultdict
from pathlib import Path
import webbrowser
from datetime import datetime
def get_tracked_text_files():
"""Get a list of all git-tracked text files in the repository."""
try:
# Get all tracked files
result = subprocess.run(['git', 'ls-files'], capture_output=True, text=True, check=True)
all_files = result.stdout.strip().split('\n')
# Filter for text files (simple heuristic: check file extension or use file command)
text_extensions = {'.txt', '.md', '.markdown', '.rst', '.py', '.js', '.ts', '.jsx', '.tsx',
'.java', '.cpp', '.c', '.h', '.hpp', '.cs', '.rb', '.go', '.rs', '.kt',
'.scala', '.php', '.html', '.htm', '.css', '.scss', '.sass', '.json',
'.yaml', '.yml', '.xml', '.sql', '.sh', '.bash', '.zsh', '.pl', '.pm',
'.lua', '.r', '.dart', '.swift', '.m', '.mm', '.groovy', '.gradle',
'.toml', '.ini', '.cfg', '.conf', '.properties', '.env', '.dockerfile',
'.gitignore', '.gitattributes', '.editorconfig', '.lock'}
text_files = []
for file_path in all_files:
if not file_path:
continue
# Check if it's a text file by extension or by checking if it's not binary
path_obj = Path(file_path)
if path_obj.suffix.lower() in text_extensions:
text_files.append(file_path)
elif path_obj.is_file():
# Additional check: try to determine if it's a text file
try:
with open(path_obj, 'r', encoding='utf-8') as f:
f.read(1024) # Try to read first 1KB as text
text_files.append(file_path)
except UnicodeDecodeError:
# Likely a binary file, skip it
continue
return text_files
except subprocess.CalledProcessError:
print("Error: Not in a git repository or git command failed.")
sys.exit(1)
def count_lines_in_file(file_path):
"""Count the total number of lines in a file."""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
return len(lines)
except Exception:
return 0
def get_git_blame_stats(file_path):
"""Perform git blame on a file and return user line counts."""
try:
result = subprocess.run(
['git', 'blame', '--line-porcelain', file_path],
capture_output=True, text=True, check=True
)
# Parse the git blame output to count lines per author
authors = defaultdict(int)
current_author = None
for line in result.stdout.splitlines():
if line.startswith('author '):
current_author = line[7:] # Remove 'author ' prefix
elif line.startswith('\t'): # Actual code line
if current_author:
authors[current_author] += 1
current_author = None
return dict(authors)
except subprocess.CalledProcessError:
# If git blame fails, return empty dict
return {}
def generate_html_report(results, output_file):
"""Generate an HTML report with the analysis results."""
html_template = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Git Line Analysis Report</title>
<style>
body {{
font-family: Arial, sans-serif;
margin: 20px;
background-color: #f5f5f5;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background-color: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}}
h1 {{
color: #333;
text-align: center;
border-bottom: 2px solid #4CAF50;
padding-bottom: 10px;
}}
.summary {{
background-color: #e9f7ef;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}}
th, td {{
border: 1px solid #ddd;
padding: 12px;
text-align: left;
}}
th {{
background-color: #4CAF50;
color: white;
}}
tr:nth-child(even) {{
background-color: #f2f2f2;
}}
.file-details {{
margin-top: 20px;
}}
.file-header {{
background-color: #2196F3;
color: white;
padding: 10px;
margin-top: 15px;
border-radius: 5px;
}}
.author-stats {{
margin-left: 20px;
}}
.total-lines {{
font-weight: bold;
color: #4CAF50;
}}
.timestamp {{
color: #666;
font-size: 0.9em;
}}
</style>
</head>
<body>
<div class="container">
<h1>Git Line Analysis Report</h1>
<p class="timestamp">Generated on: {timestamp}</p>
<div class="summary">
<h2>Summary</h2>
<p>Total files analyzed: <strong>{total_files}</strong></p>
<p>Total lines across all files: <strong>{total_lines}</strong></p>
<p>Total unique contributors: <strong>{unique_contributors}</strong></p>
</div>
<h2>Detailed Analysis</h2>
<table>
<thead>
<tr>
<th>File Path</th>
<th>Total Lines</th>
<th>Contributors Count</th>
</tr>
</thead>
<tbody>
{file_rows}
</tbody>
</table>
<h2>Contributor Statistics</h2>
<table>
<thead>
<tr>
<th>Contributor</th>
<th>Total Lines Contributed</th>
<th>Percentage</th>
</tr>
</thead>
<tbody>
{contributor_rows}
</tbody>
</table>
<div class="file-details">
<h2>Per-File Contributor Details</h2>
{detailed_sections}
</div>
</div>
</body>
</html>
"""
# Calculate totals
total_files = len(results)
total_lines = sum(r['total_lines'] for r in results.values())
# Aggregate contributor stats
contributor_totals = defaultdict(int)
for file_data in results.values():
for author, line_count in file_data['authors'].items():
contributor_totals[author] += line_count
unique_contributors = len(contributor_totals)
# Generate file rows for the summary table
file_rows = ""
for file_path, data in results.items():
file_rows += f"""
<tr>
<td><code>{file_path}</code></td>
<td class="total-lines">{data['total_lines']}</td>
<td>{len(data['authors'])}</td>
</tr>
"""
# Generate contributor rows for the contributor table
contributor_rows = ""
sorted_contributors = sorted(contributor_totals.items(), key=lambda x: x[1], reverse=True)
for author, line_count in sorted_contributors:
percentage = (line_count / total_lines * 100) if total_lines > 0 else 0
contributor_rows += f"""
<tr>
<td>{author}</td>
<td>{line_count}</td>
<td>{percentage:.2f}%</td>
</tr>
"""
# Generate detailed sections for each file
detailed_sections = ""
for file_path, data in results.items():
detailed_sections += f"""
<div class="file-header">
<strong>File:</strong> {file_path} | <strong>Total Lines:</strong> {data['total_lines']}
</div>
<div class="author-stats">
<table>
<thead>
<tr>
<th>Author</th>
<th>Lines</th>
<th>Percentage</th>
</tr>
</thead>
<tbody>
"""
for author, line_count in sorted(data['authors'].items(), key=lambda x: x[1], reverse=True):
percentage = (line_count / data['total_lines'] * 100) if data['total_lines'] > 0 else 0
detailed_sections += f"""
<tr>
<td>{author}</td>
<td>{line_count}</td>
<td>{percentage:.2f}%</td>
</tr>
"""
detailed_sections += """
</tbody>
</table>
</div>
"""
# Format the HTML
html_content = html_template.format(
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
total_files=total_files,
total_lines=total_lines,
unique_contributors=unique_contributors,
file_rows=file_rows,
contributor_rows=contributor_rows,
detailed_sections=detailed_sections
)
# Write the HTML to file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
def main():
"""Main function to run the analysis."""
print("Starting git line analysis...")
# Get all tracked text files
text_files = get_tracked_text_files()
print(f"Found {len(text_files)} git-tracked text files to analyze.\n")
# Initialize results dictionary
results = {}
# Process each file
for i, file_path in enumerate(text_files, 1):
print(f"[{i}/{len(text_files)}] Processing: {file_path}")
# Count lines in file
total_lines = count_lines_in_file(file_path)
# Get git blame stats
authors = get_git_blame_stats(file_path)
# Store results
results[file_path] = {
'total_lines': total_lines,
'authors': authors
}
print(f" - Total lines: {total_lines}")
print(f" - Contributors: {len(authors)}")
if authors:
top_contributor = max(authors.items(), key=lambda x: x[1])
print(f" - Top contributor: {top_contributor[0]} ({top_contributor[1]} lines)")
print()
# Generate HTML report
output_file = "git_line_analysis_report.html"
generate_html_report(results, output_file)
print(f"Analysis complete!")
print(f"HTML report generated: {os.path.abspath(output_file)}")
# Open the report in the default browser
try:
webbrowser.open(f'file://{os.path.abspath(output_file)}')
print("Report opened in default browser.")
except Exception as e:
print(f"Could not open browser automatically: {e}")
print("Please open the report manually.")
if __name__ == "__main__":
main()
帮我实现一段python脚本,对于当前仓库下所有文本文件,如果该文件被git跟踪:
统计该文件行数
对该文件执行git blame,统计当前版本每个用户的行数
对每一个文件实施上述操作,并在控制台持续更新结果
汇总统计结果,生成一个html格式的报表,把该文件的地址输出到控制台
自动打开生成的报表文件
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment