This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the autoreload extension | |
%load_ext autoreload | |
# Set extension to reload modules every time before executing code | |
%autoreload 2 | |
from helperfunctions import complicated_function_to_return_a_number | |
complicated_function_to_return_a_number() | |
# Output: 123 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy.random import randint | |
import matplotlib.pyplot as plt | |
# Sample 1000 random values to create a scatterplot | |
x = randint(low=1, high=1000, size=100) | |
y = randint(low=1, high=1000, size=100) | |
# This will show nothing in a Jupyter Notebook | |
plt.scatter(x, y) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Easy to read version | |
%system date | |
# Shorthand with "!!" instead of "%system" works equally well | |
!!date |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from numpy.random import randint | |
# A function to simulate one million dice throws. | |
def one_million_dice(): | |
return randint(low=1, high=7, size=1000000) | |
# Let's try %time first | |
%time throws = one_million_dice() | |
%time mean = np.mean(throws) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Outputs a list of all interactive variables in your environment | |
%who_ls | |
# Reduces the output to interactive variables of type "function" | |
%who_ls function |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.read_csv(path, encoding="ISO-8859-1", usecols=["imdbId", "Title", "Genre", "Poster"]) | |
df.set_index(["imdbId"], inplace=True) | |
df.dropna(inplace=True) | |
df.drop_duplicates(subset="Poster", keep=False, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
re_year = re.compile("\((\d{4})\)") | |
df["year"] = df.Title.map(lambda x: int(re_year.findall(x)[0]) if re_year.findall(x) else None) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df["Genre"] = df.Genre.map(lambda x: x.split("|")) | |
all_genres = set([item for l in df.Genre for item in l]) | |
for genre in all_genres: | |
new_var = "is_" + re.sub(r'\W+', '', genre.lower()) | |
df[new_var] = df.Genre.map(lambda x: genre in x) | |
df.drop(["Genre"], axis=1, inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_range["decade"] = df_range.year.apply(lambda x: str(int(x))[2] +"0s") | |
print(f"Movies per decade in the dataset:\n{df_range.decade.value_counts()}\n") | |
# Outputs: | |
# 10s 1381 | |
# 00s 1327 | |
# 90s 855 | |
# 80s 582 | |
# 70s 469 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
min_number = np.min(df.decade.value_counts()) | |
df_sample = df.groupby("decade").apply(lambda x: x.sample(min_number)) |
OlderNewer