Skip to content

Instantly share code, notes, and snippets.

View Padhma's full-sized avatar
🎯
Focusing

Padhma Muniraj Padhma

🎯
Focusing
  • University of Michigan, Ann Arbor
  • Ann Arbor, USA
  • 05:28 (UTC -04:00)
View GitHub Profile
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)
# add color to edge
plt.rcParams['axes.edgecolor']='#333F4B'
# customize spines and tick parameters
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)
# add color to edge
plt.rcParams['axes.edgecolor']='#333F4B'
# customize spines and tick parameters
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
# converting Star, Fork and Watch columns to numeric by replacing 'k' with 1000
github_df['Star'] = github_df['Star'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x))
github_df['Fork'] = github_df['Fork'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x))
github_df['Watch'] = github_df['Watch'].apply(lambda x: float(x.rstrip('k'))*1000 if 'k' in x else float(x))
# Remove , from issue and commits
github_df['Issues'] = github_df['Issues'].apply(lambda x: x.replace(',',''))
github_df['Commits'] = github_df['Commits'].apply(lambda x: x.replace(',',''))
# Convert multiple object columns to numeric
# picking columns that are more relevant
github_df = github_data_df[['topic','name','user','star','fork','watch','issue','pull_requests','topic_tag','commits','contributers']]
# pass a dictionary with old column names as key and new names as values to rename function
new_names = ['Topic','Repo_Name','User_Name','Star','Fork','Watch','Issues','Pull_Requests','Topic_Tags','Commits','Contributors']
old_names = github_df.columns
github_df = github_df.rename(columns=dict(zip(old_names, new_names)))
# a peek into the dataframe
github_df.head()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# read input data
github_data_df = pd.read_csv('../input/github-repositories-analysis/Github_data.csv')
# dropping duplicate columns
github_data_df = github_data_df.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1)
# get basic information about the dataset
[1] monthly_freq = pd.Period('2020-06',freq='M')
[2] monthly_freq
Period('2020-06', 'M')
# convert at the start of the month
[3] monthly_freq.asfreq('W', how='start')
Period('2020-06-01/2020-06-07', 'W-SUN')
# convert at the end of the month
[4] monthly_freq.asfreq('W', how='end')
[1] period_range_sample = pd.period_range('3/5/2020', '13/5/2021', freq='M')
[2] period_range_sample
PeriodIndex(['2020-03', '2020-04', '2020-05', '2020-06', '2020-07', '2020-08',
'2020-09', '2020-10', '2020-11', '2020-12', '2021-01', '2021-02',
'2021-03', '2021-04', '2021-05'],
dtype='period[M]', freq='M')
[1] p1 = pd.Period(2020, freq='A-OCT')
[2] p1
Period('2020', 'A-OCT')
[3] p2 = pd.Period('2020-05', freq='M')
[4] p2
Period('2019-06', 'M')
# time object
[1] time = datetime.time(hour=6, minute=45, second=10, microsecond=348713)
# add timedelta to time
[2] td1 + time
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-32-f5329db22ead> in <module>
----> 1 td1 + time
# date object
[1] date = datetime.date(year=2020, month=5, day=3)
# add timedelta to date
[2] td1 + date
datetime.date(2020, 5, 30)