This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We get a boolean for each category of Gender -> Gender Male and Female | |
df_multiple = pd.get_dummies(df, dtype=int) | |
# We drop the Gender Male column and just keep the Gender Female | |
df_multiple.drop("Gender_Male", axis=1, inplace=True) | |
# We rename the resting Gender_Female as a generic Gender -> 0: Male and 1: Female | |
df_multiple.rename(columns={"Gender_Female":"Gender"}, inplace=True) | |
df_multiple |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# polynomial - males | |
male_fit = np.polyfit(df_male.Height, df_male.Weight, 1) | |
# array([ 5.96177381, -224.49884071]) | |
# polynomial - females | |
female_fit = np.polyfit(df_female.Height, df_female.Weight, 1) | |
# array([ 5.99404661, -246.01326575]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_male.Height.plot(kind='hist', color='blue', edgecolor='black', alpha=0.5, figsize=(10, 7)) | |
df_female.Height.plot(kind='hist', color='lightsalmon', edgecolor='black', alpha=0.5, figsize=(10, 7)) | |
plt.legend(labels=['Males', 'Females']) | |
plt.title('Distribution of Height', size=35) | |
plt.xlabel('Height (inches)', size=30) | |
plt.ylabel('Frequency', size=30) | |
plt.gca().set_facecolor('white') | |
# Change the label size for both axes | |
plt.gca().tick_params(axis='both', which='major', labelsize=28) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression | |
df = df_height | |
# create linear regression object | |
lr = LinearRegression() | |
# fit linear regression | |
lr.fit(df[['Height']], df['Weight']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
df = df_height | |
#df = df[df["Gender"] == "Male"] | |
df = df.sample(100, replace=False) # Replace 'n' with the number of rows you want | |
# Real Values | |
x = df["Height"].values | |
y = df["Weight"].values | |
N = len(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def gradient_descent(x, y, A, B, learning_rate): | |
N = len(x) | |
y_pred = A * x + B | |
dA = -2 * np.sum(x * (y - y_pred)) / N | |
dB = -2 * np.sum(y - y_pred) / N | |
A -= learning_rate * dA | |
B -= learning_rate * dB | |
return A, B |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compute_mse(y_true, y_pred): | |
N = len(y_true) | |
return np.mean((y_true - y_pred) ** 2)/N |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
df = df_height | |
#df = df[df["Gender"] == "Male"] | |
df = df.sample(500, replace=False) # Replace 'n' with the number of rows you want | |
# Real Values | |
x = df["Height"].values | |
y = df["Weight"].values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nested_json = { | |
'name': 'David', | |
'city': 'London', | |
'income': 80000, | |
'skills': ["python", "SQL","GCP"], | |
'roles': { | |
"project manager":False, | |
"data engineer":False, | |
"data scientist":True, | |
"data analyst":False, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
simple_json = { | |
'name': 'David', | |
'city': 'London', | |
} | |
simple_json_2 = { | |
'name': 'Taylor', | |
'city': 'Chicago', | |
'income': 120000, | |
} |