This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set figure size and dpi | |
fig, ax = plt.subplots(figsize=(6,4), dpi=100) | |
# add color to edge | |
plt.rcParams['axes.edgecolor']='#333F4B' | |
# customize spines and tick parameters | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.spines['left'].set_visible(False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set figure size and dpi | |
fig, ax = plt.subplots(figsize=(6,4), dpi=100) | |
# add color to edge | |
plt.rcParams['axes.edgecolor']='#333F4B' | |
# customize spines and tick parameters | |
ax.spines['top'].set_visible(False) | |
ax.spines['right'].set_visible(False) | |
ax.spines['left'].set_visible(False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# converting Star, Fork and Watch columns to numeric by replacing 'k' with 1000 | |
github_df['Star'] = github_df['Star'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x)) | |
github_df['Fork'] = github_df['Fork'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x)) | |
github_df['Watch'] = github_df['Watch'].apply(lambda x: float(x.rstrip('k'))*1000 if 'k' in x else float(x)) | |
# Remove , from issue and commits | |
github_df['Issues'] = github_df['Issues'].apply(lambda x: x.replace(',','')) | |
github_df['Commits'] = github_df['Commits'].apply(lambda x: x.replace(',','')) | |
# Convert multiple object columns to numeric |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# picking columns that are more relevant | |
github_df = github_data_df[['topic','name','user','star','fork','watch','issue','pull_requests','topic_tag','commits','contributers']] | |
# pass a dictionary with old column names as key and new names as values to rename function | |
new_names = ['Topic','Repo_Name','User_Name','Star','Fork','Watch','Issues','Pull_Requests','Topic_Tags','Commits','Contributors'] | |
old_names = github_df.columns | |
github_df = github_df.rename(columns=dict(zip(old_names, new_names))) | |
# a peek into the dataframe | |
github_df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# read input data | |
github_data_df = pd.read_csv('../input/github-repositories-analysis/Github_data.csv') | |
# dropping duplicate columns | |
github_data_df = github_data_df.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1) | |
# get basic information about the dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[1] monthly_freq = pd.Period('2020-06',freq='M') | |
[2] monthly_freq | |
Period('2020-06', 'M') | |
# convert at the start of the month | |
[3] monthly_freq.asfreq('W', how='start') | |
Period('2020-06-01/2020-06-07', 'W-SUN') | |
# convert at the end of the month | |
[4] monthly_freq.asfreq('W', how='end') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[1] period_range_sample = pd.period_range('3/5/2020', '13/5/2021', freq='M') | |
[2] period_range_sample | |
PeriodIndex(['2020-03', '2020-04', '2020-05', '2020-06', '2020-07', '2020-08', | |
'2020-09', '2020-10', '2020-11', '2020-12', '2021-01', '2021-02', | |
'2021-03', '2021-04', '2021-05'], | |
dtype='period[M]', freq='M') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[1] p1 = pd.Period(2020, freq='A-OCT') | |
[2] p1 | |
Period('2020', 'A-OCT') | |
[3] p2 = pd.Period('2020-05', freq='M') | |
[4] p2 | |
Period('2019-06', 'M') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# time object | |
[1] time = datetime.time(hour=6, minute=45, second=10, microsecond=348713) | |
# add timedelta to time | |
[2] td1 + time | |
--------------------------------------------------------------------------- | |
TypeError Traceback (most recent call last) | |
<ipython-input-32-f5329db22ead> in <module> | |
----> 1 td1 + time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# date object | |
[1] date = datetime.date(year=2020, month=5, day=3) | |
# add timedelta to date | |
[2] td1 + date | |
datetime.date(2020, 5, 30) |