Padhma Muniraj Padhma

## average watchers.py
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)

# add color to edge
plt.rcParams['axes.edgecolor']='#333F4B'

# customize spines and tick parameters
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)

## average stars.py
# set figure size and dpi
fig, ax = plt.subplots(figsize=(6,4), dpi=100)

# add color to edge
plt.rcParams['axes.edgecolor']='#333F4B'

# customize spines and tick parameters
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)

## data preprocessing.py
# converting Star, Fork and Watch columns to numeric by replacing 'k' with 1000
github_df['Star'] = github_df['Star'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x))
github_df['Fork'] = github_df['Fork'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x))
github_df['Watch'] = github_df['Watch'].apply(lambda x: float(x.rstrip('k'))*1000 if 'k' in x else float(x))

# Remove , from issue and commits
github_df['Issues'] = github_df['Issues'].apply(lambda x: x.replace(',',''))
github_df['Commits'] = github_df['Commits'].apply(lambda x: x.replace(',',''))

# Convert multiple object columns to numeric

## rename cols.py
# picking columns that are more relevant
github_df = github_data_df[['topic','name','user','star','fork','watch','issue','pull_requests','topic_tag','commits','contributers']]

# pass a dictionary with old column names as key and new names as values to rename function
new_names = ['Topic','Repo_Name','User_Name','Star','Fork','Watch','Issues','Pull_Requests','Topic_Tags','Commits','Contributors']
old_names = github_df.columns
github_df = github_df.rename(columns=dict(zip(old_names, new_names)))

# a peek into the dataframe
github_df.head()

## imports.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# read input data
github_data_df = pd.read_csv('../input/github-repositories-analysis/Github_data.csv')
# dropping duplicate columns
github_data_df = github_data_df.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1)
# get basic information about the dataset

## asfreq.py
[1] monthly_freq = pd.Period('2020-06',freq='M')
[2] monthly_freq
Period('2020-06', 'M')

# convert at the start of the month
[3] monthly_freq.asfreq('W', how='start')
Period('2020-06-01/2020-06-07', 'W-SUN')

# convert at the end of the month
[4] monthly_freq.asfreq('W', how='end')

## period_range.py
[1] period_range_sample = pd.period_range('3/5/2020', '13/5/2021', freq='M')
[2] period_range_sample
PeriodIndex(['2020-03', '2020-04', '2020-05', '2020-06', '2020-07', '2020-08',
             '2020-09', '2020-10', '2020-11', '2020-12', '2021-01', '2021-02',
             '2021-03', '2021-04', '2021-05'],
            dtype='period[M]', freq='M')

## time period.py
[1] p1 = pd.Period(2020, freq='A-OCT')
[2] p1
Period('2020', 'A-OCT')
[3] p2 = pd.Period('2020-05', freq='M')
[4] p2
Period('2019-06', 'M')

## time.py
# time object
[1] time = datetime.time(hour=6, minute=45, second=10, microsecond=348713)

# add timedelta to time
[2] td1 + time
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-32-f5329db22ead> in <module>
----> 1 td1 + time

## date.py
# date object
[1] date = datetime.date(year=2020, month=5, day=3)

# add timedelta to date
[2] td1 + date
datetime.date(2020, 5, 30)
	# set figure size and dpi
	fig, ax = plt.subplots(figsize=(6,4), dpi=100)

	# add color to edge
	plt.rcParams['axes.edgecolor']='#333F4B'

	# customize spines and tick parameters
	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)
	# converting Star, Fork and Watch columns to numeric by replacing 'k' with 1000
	github_df['Star'] = github_df['Star'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x))
	github_df['Fork'] = github_df['Fork'].apply(lambda x: float(x.rstrip('k'))*1000 if x.endswith('k') else float(x))
	github_df['Watch'] = github_df['Watch'].apply(lambda x: float(x.rstrip('k'))*1000 if 'k' in x else float(x))

	# Remove , from issue and commits
	github_df['Issues'] = github_df['Issues'].apply(lambda x: x.replace(',',''))
	github_df['Commits'] = github_df['Commits'].apply(lambda x: x.replace(',',''))

	# Convert multiple object columns to numeric
	# picking columns that are more relevant
	github_df = github_data_df[['topic','name','user','star','fork','watch','issue','pull_requests','topic_tag','commits','contributers']]

	# pass a dictionary with old column names as key and new names as values to rename function
	new_names = ['Topic','Repo_Name','User_Name','Star','Fork','Watch','Issues','Pull_Requests','Topic_Tags','Commits','Contributors']
	old_names = github_df.columns
	github_df = github_df.rename(columns=dict(zip(old_names, new_names)))

	# a peek into the dataframe
	github_df.head()
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns

	# read input data
	github_data_df = pd.read_csv('../input/github-repositories-analysis/Github_data.csv')
	# dropping duplicate columns
	github_data_df = github_data_df.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1)
	# get basic information about the dataset
	[1] monthly_freq = pd.Period('2020-06',freq='M')
	[2] monthly_freq
	Period('2020-06', 'M')

	# convert at the start of the month
	[3] monthly_freq.asfreq('W', how='start')
	Period('2020-06-01/2020-06-07', 'W-SUN')

	# convert at the end of the month
	[4] monthly_freq.asfreq('W', how='end')
	[1] period_range_sample = pd.period_range('3/5/2020', '13/5/2021', freq='M')
	[2] period_range_sample
	PeriodIndex(['2020-03', '2020-04', '2020-05', '2020-06', '2020-07', '2020-08',
	'2020-09', '2020-10', '2020-11', '2020-12', '2021-01', '2021-02',
	'2021-03', '2021-04', '2021-05'],
	dtype='period[M]', freq='M')
	[1] p1 = pd.Period(2020, freq='A-OCT')
	[2] p1
	Period('2020', 'A-OCT')
	[3] p2 = pd.Period('2020-05', freq='M')
	[4] p2
	Period('2019-06', 'M')
	# time object
	[1] time = datetime.time(hour=6, minute=45, second=10, microsecond=348713)

	# add timedelta to time
	[2] td1 + time
	---------------------------------------------------------------------------
	TypeError Traceback (most recent call last)
	<ipython-input-32-f5329db22ead> in <module>
	----> 1 td1 + time
	# date object
	[1] date = datetime.date(year=2020, month=5, day=3)

	# add timedelta to date
	[2] td1 + date
	datetime.date(2020, 5, 30)