Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
df_raw = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")
df_raw.head()
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
df = load_boston()
challenge_df = pd.DataFrame(df.data)
challenge_df.columns = df.feature_names
import pandas as pd
import numpy as np
df = pd.read_csv('../Datasets/Life Expectancy Data.csv')
import pandas as pd
import random
df_original = pd.read_csv('../Datasets/titanic.csv')
df_original = df_original.dropna()
df_original.reset_index(inplace=True)
# Introduce missing values
random.seed(15)
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
df = load_boston()
boston_df = pd.DataFrame(df.data)
boston_df.columns = df.feature_names
import pandas as pd
import numpy as np
df = pd.read_csv('../Datasets/titanic_missing.csv')
import pandas as pd
import numpy as np
df = pd.read_csv('Datasets/titanic.csv').sample(50, random_state=100)
n_missing = np.random.randint(4, 15, 1)
for i in range(n_missing):
row = np.random.randint(1, df.shape[0])
df.at[row, "Age"] = np.nan
# 1.
import numpy as np
arr = np.array([1,20,300,4000,50000])
arr
# 2.
T1 = [(1, 10, 10), (2,20), (3,30)]
T2 = [(1, 10), (2,20), (3,30)]
np.random.seed(100)
alphabets = list('ABCDEFGHIJKLMNOPQRSTUVXYZ')
A = np.random.choice(alphabets, 10)
B = np.random.choice(alphabets, 20)
C = np.random.choice(alphabets, 5)
import numpy as np
arr = np.genfromtxt("Datasets/stock_price_miss.csv", delimiter="csv", skip_header=1).round(2)
arr[:30]