Created
June 17, 2021 17:12
-
-
Save aktech/ccb6532d56965757d5af405a84cff778 to your computer and use it in GitHub Desktop.
Linux Commits Bar Chart Race
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A script to generate a Bar Chart race graph from the linux git commits | |
commits.txt was generated via: | |
git log --format=%ai$%an > commits.txt | |
Pre-requisite: | |
============== | |
conda install -c conda-forge bar_chart_race ffmpeg imagemagick pandas | |
Instructions to run: | |
==================== | |
python linux.commits.py | |
""" | |
import time | |
import pandas as pd | |
import bar_chart_race as bcr | |
def create_data_for_chart(): | |
ddf = pd.read_csv("commits.txt", sep="$", encoding="ISO-8859-1", names=['date', 'author'], header=None) | |
ddf['count'] = 1 | |
ddf.date = pd.to_datetime(ddf.date, utc=True) | |
ddf_indexed = ddf.set_index('date').sort_index() #.loc['2005-01-01':'2022-01-01'] | |
ddf_grouped = ddf_indexed.groupby([pd.Grouper(freq='M'), 'author'])['count'].sum() | |
ddf_new = ddf_grouped.unstack("author", fill_value=0) | |
ddf_new_cumsum = ddf_new.cumsum(axis=0) | |
ddf_new_cumsum.to_csv('linux-commitx.csv') | |
return ddf_new_cumsum, ddf_indexed | |
def period_func(ddf_indexed): | |
def _inner(v, _): | |
total_commits_so_far = ddf_indexed.loc[:v._name]['count'].sum() | |
return { | |
'x': .98, | |
'y': .2, | |
's': f'Total Commits: {total_commits_so_far:,.0f}', | |
'ha': 'right', 'size': 11 | |
} | |
return _inner | |
def create_bar_chart(ddf_new_cumsum, ddf_indexed): | |
bcr.bar_chart_race( | |
df=ddf_new_cumsum, | |
filename="linux.mp4", | |
n_bars=25, | |
filter_column_colors=True, | |
title='Linux Committers Over Time', | |
figsize=(8, 6), | |
period_fmt='%B %d, %Y', | |
period_label={'x': .98, 'y': .3, 'ha': 'right', 'va': 'center'}, | |
period_summary_func=period_func(ddf_indexed), | |
dpi=500, | |
) | |
def main(): | |
start = time.time() | |
print("Preparing data") | |
df, ddf_indexed = create_data_for_chart() | |
print("Creating Bar chart") | |
create_bar_chart(df, ddf_indexed) | |
print(f"Time taken: {time.time() - start}") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment