This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
data = pd.read_csv('MyAnimeList/anime.csv', na_values = ['Unknown']) | |
# Have a look at missing values and the total records in the data | |
print(data.isnull().sum()/len(data) * 100) | |
print(f"Total number of records: {len(data)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Have a look at missing values and the total records in the | |
# data after dropping records with missing data | |
data = data.dropna(axis = 0) | |
print(data.isnull().sum()/len(data) * 100) | |
display(HTML(data.head(5).to_html())) | |
print(f"Total number of records: {len(data)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def histogram(self, df, colname, bins = None): | |
# Create an axis object using subplots from matplotlib | |
fig, ax = plt.subplots(1, 1 ,figsize = (20, 6)) | |
# Plot a histogram of the column from the dataframe mentioned | |
if bins: | |
sns.distplot(df[colname], ax = ax, bins = bins) | |
else: | |
sns.distplot(df[colname], ax = ax) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def genre_countplot(self, df, colname): | |
# Process the genre column to split on comma and append resulting | |
# genres all to a single list | |
all_genres = [] | |
for item in df[colname]: | |
item = item.strip() | |
all_genres.extend(item.split(', ')) | |
# Count the number of items in the genre and create a dataframe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def barplot(self, df, xcolname, ycolname): | |
# Create an axis object in which to place our barchart | |
fig, ax = plt.subplots(1, 1, figsize = (15, 8)) | |
# Plot the barchart | |
sns.barplot(x = xcolname, y = ycolname, data = df, ax = ax) | |
# Set the axes and title of the plot | |
title = f"Barchart of {xcolname.title()} vs {ycolname.title()}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def pieplot(self, data, colname): | |
# Create an axis object | |
fig, ax = plt.subplots(1, 1, figsize = (15, 8)) | |
# Get the data in the right format | |
labels = list(data.keys()) | |
sizes = list(data.values()) | |
# Plot the pie chart |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def boxplot(self, data, xcolname, ycolname, ax, log=False): | |
# Create a copy of the data to work with | |
df = data.copy() | |
# Check on the log condition | |
name = None | |
if log: | |
name = f'Log_{ycolname}' | |
df[name] = np.log(df[ycolname]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def violinplot(self, data, xcolname, ycolname, ax, log=False): | |
# Create a copy of the data to work with | |
df = data.copy() | |
# Check on the log condition | |
name = None | |
if log: | |
name = f'Log_{ycolname}' | |
df[name] = np.log(df[ycolname]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def scatterplot(self, data, xcolname, ycolname): | |
# Create an axis object to plot on | |
fig, ax = plt.subplots(1, 1, figsize = (15, 8)) | |
# Plot the scatterplot | |
sns.regplot(x = xcolname, y = ycolname, ax = ax, data = data) | |
# Set the title and axes names | |
title = f"Scatterplot of {ycolname.title()} vs {xcolname.title()}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def heatmap(self, data, xcolname, ycolname, cmap = 'YlGnBu', quantity = "occurrences"): | |
# Create an axis object to plot on | |
fig, ax = plt.subplots(1, 1, figsize = (20, 15)) | |
# Plot the heatmap | |
sns.heatmap(data, ax = ax, annot = True, cmap = cmap, fmt=".2f", linewidth=0.3) | |
# Set the title and name the axes | |
title = f"Heatmap describing {quantity}: {ycolname.title()} vs {xcolname.title()}" |
OlderNewer