Last active
August 5, 2023 12:07
-
-
Save jskherman/c27813f559af85a46e41b249ee15e655 to your computer and use it in GitHub Desktop.
A Python script for exporting a GitHub user's daily commit counts to a CSV file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_gh_commit_data(github_username: str) -> list: | |
""" | |
Get GitHub commit data for a given username by scraping the GitHub | |
contribution calendar heatmap for each year of activity. | |
This function returns a list of dictionaries with the following keys: | |
- date: The date of the commit | |
- commits: The number of commits on that date | |
Parameters | |
---------- | |
github_username : str | |
The GitHub username to get commit data for. | |
Returns | |
------- | |
list | |
A list of dictionaries with the commit data. | |
""" | |
# Import the requests and BeautifulSoup libraries | |
import requests | |
import datetime | |
from bs4 import BeautifulSoup | |
_r1 = requests.get(f"https://github.com/{github_username}/") | |
# Parse HTML and save to BeautifulSoup object | |
html = BeautifulSoup(_r1.text, "html.parser") | |
# Select the "year" links | |
year_list = html.select("a.js-year-link") | |
# Get the page links for each year | |
pages = [] | |
for year in year_list: | |
pages.append("https://github.com" + year.attrs["href"]) | |
# Get the commit data for each year | |
commits = [] | |
today = datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) | |
for page in pages: | |
# Get the page HTML for a given year | |
_r = requests.get(page) | |
html = BeautifulSoup(_r.text, "html.parser") | |
# Get all the "rect" elements in the Calendar heatmap | |
days = html.find_all("td", class_="ContributionCalendar-day") | |
# Get the commit data for each day | |
for day in days: | |
# Check if the rect element has a "data-date" and "data-count" attribute | |
if ("data-date" in day.attrs): | |
# Check if the date is in the past | |
if datetime.datetime.strptime(day["data-date"], "%Y-%m-%d") < today: | |
# Append the commit data to the list commits | |
commit_count = day.text.split(" ")[0] | |
if commit_count == "No": | |
commit_count = 0 | |
commits.append( | |
{"date": day["data-date"], "commit_count": commit_count} | |
) | |
return commits | |
def main(args=None): | |
import pandas as pd | |
# Get the GitHub username from the user | |
username = input("Enter GitHub username: ") | |
# Get the commit data and save to a pandas DataFrame | |
df = pd.DataFrame(get_gh_commit_data(username)) | |
df.date = pd.to_datetime(df.date) | |
df = df.sort_values(by="date") | |
# Save the commit data to a CSV file | |
df.to_csv(f"{username}_commit_data.csv", index=False) | |
# Check if the script is being run directly and run the main function | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Do `pip install beautifulsoup4 pandas` | |
beautifulsoup4 | |
pandas |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment