Skip to content

Instantly share code, notes, and snippets.

# We get a boolean for each category of Gender -> Gender Male and Female
df_multiple = pd.get_dummies(df, dtype=int)
# We drop the Gender Male column and just keep the Gender Female
df_multiple.drop("Gender_Male", axis=1, inplace=True)
# We rename the resting Gender_Female as a generic Gender -> 0: Male and 1: Female
df_multiple.rename(columns={"Gender_Female":"Gender"}, inplace=True)
df_multiple
import numpy as np
# polynomial - males
male_fit = np.polyfit(df_male.Height, df_male.Weight, 1)
# array([ 5.96177381, -224.49884071])
# polynomial - females
female_fit = np.polyfit(df_female.Height, df_female.Weight, 1)
# array([ 5.99404661, -246.01326575])
df_male.Height.plot(kind='hist', color='blue', edgecolor='black', alpha=0.5, figsize=(10, 7))
df_female.Height.plot(kind='hist', color='lightsalmon', edgecolor='black', alpha=0.5, figsize=(10, 7))
plt.legend(labels=['Males', 'Females'])
plt.title('Distribution of Height', size=35)
plt.xlabel('Height (inches)', size=30)
plt.ylabel('Frequency', size=30)
plt.gca().set_facecolor('white')
# Change the label size for both axes
plt.gca().tick_params(axis='both', which='major', labelsize=28)
from sklearn.linear_model import LinearRegression
df = df_height
# create linear regression object
lr = LinearRegression()
# fit linear regression
lr.fit(df[['Height']], df['Weight'])
import numpy as np
df = df_height
#df = df[df["Gender"] == "Male"]
df = df.sample(100, replace=False) # Replace 'n' with the number of rows you want
# Real Values
x = df["Height"].values
y = df["Weight"].values
N = len(x)
def gradient_descent(x, y, A, B, learning_rate):
N = len(x)
y_pred = A * x + B
dA = -2 * np.sum(x * (y - y_pred)) / N
dB = -2 * np.sum(y - y_pred) / N
A -= learning_rate * dA
B -= learning_rate * dB
return A, B
def compute_mse(y_true, y_pred):
N = len(y_true)
return np.mean((y_true - y_pred) ** 2)/N
import numpy as np
df = df_height
#df = df[df["Gender"] == "Male"]
df = df.sample(500, replace=False) # Replace 'n' with the number of rows you want
# Real Values
x = df["Height"].values
y = df["Weight"].values
nested_json = {
'name': 'David',
'city': 'London',
'income': 80000,
'skills': ["python", "SQL","GCP"],
'roles': {
"project manager":False,
"data engineer":False,
"data scientist":True,
"data analyst":False,
simple_json = {
'name': 'David',
'city': 'London',
}
simple_json_2 = {
'name': 'Taylor',
'city': 'Chicago',
'income': 120000,
}