Skip to content

Instantly share code, notes, and snippets.

@paulzuradzki
Created September 16, 2024 20:51
Show Gist options
  • Save paulzuradzki/d61341bfba68e8e98a98cf3812b64fc8 to your computer and use it in GitHub Desktop.
Save paulzuradzki/d61341bfba68e8e98a98cf3812b64fc8 to your computer and use it in GitHub Desktop.
Module 1 homework. ML Zoomcamp course.
"""Module 1 homework. ML Zoomcamp course."""
from pprint import pprint
import numpy as np
import pandas as pd
def main():
df = get_df_from_url()
results = {
"q1": q1(),
"q2": q2(df),
"q3": q3(df),
"q4": q4(df),
"q5": q5(df),
"q6": q6(df),
"q7": q7(df),
}
pprint(results)
def get_df_from_url():
url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv"
return pd.read_csv(url)
def q1():
"""Pandas version
What's the version of Pandas that you installed?
"""
return pd.__version__
def q2(df):
"""Records count
How many records are in the dataset?
"""
return df.shape[0]
def q3(df):
"""Laptop brands
How many laptop brands are presented in the dataset?
"""
return df["Brand"].nunique()
def q4(df):
"""Missing values
How many columns in the dataset have missing values?
"""
return (df.isna().sum() > 0).sum()
def q5(df):
"""Maximum final price
What's the maximum final price of Dell notebooks in the dataset?
"""
return df.query("Brand=='Dell'")["Final Price"].max()
def q6(df):
"""Maximum final price
What's the maximum final price of Dell notebooks in the dataset?
"""
median = df["Screen"].median()
mode = df["Screen"].mode().values[0]
median_fillna = df["Screen"].fillna(mode).median()
has_it_changed = not (median == mode == median_fillna)
return has_it_changed
def q7(df):
"""Sum of weights
- Select all the "Innjoo" laptops from the dataset.
- Select only columns RAM, Storage, Screen.
- Get the underlying NumPy array. Let's call it X.
- Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.
- Compute the inverse of XTX.
- Create an array y with values [1100, 1300, 800, 900, 1000, 1100].
- Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w.
- What's the sum of all the elements of the result?
- Note: You just implemented linear regression. We'll talk about it in the next lesson.
"""
X = df.query("Brand=='Innjoo'")[["RAM", "Storage", "Screen"]].values
XTX = X.T.dot(X)
inverse_XTX = np.linalg.inv(XTX)
y = np.array([1100, 1300, 800, 900, 1000, 1100])
w = inverse_XTX.dot(X.T).dot(y)
return w.sum()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment