-
-
Save paulzuradzki/d61341bfba68e8e98a98cf3812b64fc8 to your computer and use it in GitHub Desktop.
Module 1 homework. ML Zoomcamp course.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Module 1 homework. ML Zoomcamp course.""" | |
from pprint import pprint | |
import numpy as np | |
import pandas as pd | |
def main(): | |
df = get_df_from_url() | |
results = { | |
"q1": q1(), | |
"q2": q2(df), | |
"q3": q3(df), | |
"q4": q4(df), | |
"q5": q5(df), | |
"q6": q6(df), | |
"q7": q7(df), | |
} | |
pprint(results) | |
def get_df_from_url(): | |
url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv" | |
return pd.read_csv(url) | |
def q1(): | |
"""Pandas version | |
What's the version of Pandas that you installed? | |
""" | |
return pd.__version__ | |
def q2(df): | |
"""Records count | |
How many records are in the dataset? | |
""" | |
return df.shape[0] | |
def q3(df): | |
"""Laptop brands | |
How many laptop brands are presented in the dataset? | |
""" | |
return df["Brand"].nunique() | |
def q4(df): | |
"""Missing values | |
How many columns in the dataset have missing values? | |
""" | |
return (df.isna().sum() > 0).sum() | |
def q5(df): | |
"""Maximum final price | |
What's the maximum final price of Dell notebooks in the dataset? | |
""" | |
return df.query("Brand=='Dell'")["Final Price"].max() | |
def q6(df): | |
"""Maximum final price | |
What's the maximum final price of Dell notebooks in the dataset? | |
""" | |
median = df["Screen"].median() | |
mode = df["Screen"].mode().values[0] | |
median_fillna = df["Screen"].fillna(mode).median() | |
has_it_changed = not (median == mode == median_fillna) | |
return has_it_changed | |
def q7(df): | |
"""Sum of weights | |
- Select all the "Innjoo" laptops from the dataset. | |
- Select only columns RAM, Storage, Screen. | |
- Get the underlying NumPy array. Let's call it X. | |
- Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX. | |
- Compute the inverse of XTX. | |
- Create an array y with values [1100, 1300, 800, 900, 1000, 1100]. | |
- Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w. | |
- What's the sum of all the elements of the result? | |
- Note: You just implemented linear regression. We'll talk about it in the next lesson. | |
""" | |
X = df.query("Brand=='Innjoo'")[["RAM", "Storage", "Screen"]].values | |
XTX = X.T.dot(X) | |
inverse_XTX = np.linalg.inv(XTX) | |
y = np.array([1100, 1300, 800, 900, 1000, 1100]) | |
w = inverse_XTX.dot(X.T).dot(y) | |
return w.sum() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment