Skip to content

Instantly share code, notes, and snippets.

View Padhma's full-sized avatar
🎯
Focusing

Padhma Muniraj Padhma

🎯
Focusing
  • University of Michigan, Ann Arbor
  • Ann Arbor, USA
  • 10:19 (UTC -04:00)
View GitHub Profile
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
# create a string with all the topic tags
github_tags = (" ").join(all_tags)
# assign the mask image to a variable
git_mask = np.array(Image.open('../input/wordcloud-mask/github_icon.jpg'))
# instantiate a word cloud object
# length of tags list in each column
len_tags = [len(tag) for tag in topic_tags]
# create a new column -> total_tags
github_df['Total_Tags'] = len_tags
# group based on topic and calculate total_tags in each topic
topic_wise_tags = github_df.groupby('Topic').sum()['Total_Tags'].reset_index(name='Total Tags')
# set figure size and dpi
# create a dataframe using users_with_more_repos list
more_repos_users_df = github_df[github_df['User_Name'].isin(users_with_more_repos)][['Issues','Pull_Requests','Commits','Contributors']]
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)
# plot the correlation in a heatmap
sns.heatmap(more_repos_users_df.corr(), linewidths=0.1, vmax=1.0, square=True, linecolor='white', annot=True, cmap='summer');
fig.suptitle('Correlation of contributions among users with more repositories',fontsize=16, color = '#333F4B');
# drop rows with any null values and create a dataframe with only the contribution columns
corr_df = github_df.dropna(axis=0, subset = ['Issues','Pull_Requests','Commits','Contributors'])[['Issues','Pull_Requests','Commits','Contributors']]
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)
# plot the correlation in a heatmap
sns.heatmap(corr_df.corr(), linewidths=0.1, vmax=1.0, square=True, linecolor='white', annot=True, cmap='summer');
fig.suptitle('Correlation between the contribution columns',fontsize=16, color = '#333F4B');
# create a list of top 10 users with more repositories
users_with_more_repos = github_df.groupby('User_Name').size().nlargest(n=10).reset_index(name='Count')['User_Name'].to_list()
# create a dataframe using users_with_more_repos list
more_repos_users = github_df[github_df['User_Name'].isin(users_with_more_repos)][['Topic','User_Name','Star']]
# plot data
sns.countplot(data=more_repos_users,y='User_Name',palette = 'cool',order=more_repos_users['User_Name'].value_counts().index);
# set figure size and dpi
# set figure size and dpi
fig, ax = plt.subplots(figsize=(8,4), dpi=100)
# set seaborn theme for background grids
sns.set_theme('paper')
# plot the data
sns.regplot(data=github_df, x='Watch', y='Fork', color='purple');
# set x and y-axis labels and title
# set figure size and dpi
fig, ax = plt.subplots(figsize=(8,4), dpi=100)
# set seaborn theme for background grids
sns.set_theme('paper')
# plot the data
sns.regplot(data=github_df, x='Star', y='Fork', color='purple');
# set x and y-axis labels and title
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)
# add colors to edge
plt.rcParams['axes.edgecolor']='#333F4B'
# customize spines and tick parameters
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)