Skip to content

Instantly share code, notes, and snippets.

View ElisonSherton's full-sized avatar
🏠
Working from home

Vinayak Nayak ElisonSherton

🏠
Working from home
View GitHub Profile
import pandas as pd
data = pd.read_csv('MyAnimeList/anime.csv', na_values = ['Unknown'])
# Have a look at missing values and the total records in the data
print(data.isnull().sum()/len(data) * 100)
print(f"Total number of records: {len(data)}")
# Have a look at missing values and the total records in the
# data after dropping records with missing data
data = data.dropna(axis = 0)
print(data.isnull().sum()/len(data) * 100)
display(HTML(data.head(5).to_html()))
print(f"Total number of records: {len(data)}")
@classmethod
def histogram(self, df, colname, bins = None):
# Create an axis object using subplots from matplotlib
fig, ax = plt.subplots(1, 1 ,figsize = (20, 6))
# Plot a histogram of the column from the dataframe mentioned
if bins:
sns.distplot(df[colname], ax = ax, bins = bins)
else:
sns.distplot(df[colname], ax = ax)
@classmethod
def genre_countplot(self, df, colname):
# Process the genre column to split on comma and append resulting
# genres all to a single list
all_genres = []
for item in df[colname]:
item = item.strip()
all_genres.extend(item.split(', '))
# Count the number of items in the genre and create a dataframe
@classmethod
def barplot(self, df, xcolname, ycolname):
# Create an axis object in which to place our barchart
fig, ax = plt.subplots(1, 1, figsize = (15, 8))
# Plot the barchart
sns.barplot(x = xcolname, y = ycolname, data = df, ax = ax)
# Set the axes and title of the plot
title = f"Barchart of {xcolname.title()} vs {ycolname.title()}"
@classmethod
def pieplot(self, data, colname):
# Create an axis object
fig, ax = plt.subplots(1, 1, figsize = (15, 8))
# Get the data in the right format
labels = list(data.keys())
sizes = list(data.values())
# Plot the pie chart
@classmethod
def boxplot(self, data, xcolname, ycolname, ax, log=False):
# Create a copy of the data to work with
df = data.copy()
# Check on the log condition
name = None
if log:
name = f'Log_{ycolname}'
df[name] = np.log(df[ycolname])
@classmethod
def violinplot(self, data, xcolname, ycolname, ax, log=False):
# Create a copy of the data to work with
df = data.copy()
# Check on the log condition
name = None
if log:
name = f'Log_{ycolname}'
df[name] = np.log(df[ycolname])
@classmethod
def scatterplot(self, data, xcolname, ycolname):
# Create an axis object to plot on
fig, ax = plt.subplots(1, 1, figsize = (15, 8))
# Plot the scatterplot
sns.regplot(x = xcolname, y = ycolname, ax = ax, data = data)
# Set the title and axes names
title = f"Scatterplot of {ycolname.title()} vs {xcolname.title()}"
@classmethod
def heatmap(self, data, xcolname, ycolname, cmap = 'YlGnBu', quantity = "occurrences"):
# Create an axis object to plot on
fig, ax = plt.subplots(1, 1, figsize = (20, 15))
# Plot the heatmap
sns.heatmap(data, ax = ax, annot = True, cmap = cmap, fmt=".2f", linewidth=0.3)
# Set the title and name the axes
title = f"Heatmap describing {quantity}: {ycolname.title()} vs {xcolname.title()}"