Skip to content

Instantly share code, notes, and snippets.

View Emekaborisama's full-sized avatar
🎯
Focusing

Emeka boris ama Emekaborisama

🎯
Focusing
View GitHub Profile
Continent Frontend Backend
Africa 40 80
Europe 80 120
Asia 120 80
North America 100 80
South America 200 150
Australia 18 50
sklearn
s3fs
pandas
awscli
magniv
#example
@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")
#example
@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")
#example
@task(key="second",schedule="@monthly",resources={"cpu": "2000m", "memory": "2Gi"},description=" preprocess data and run price prediction inference")
import os
import resource
import requests
import pandas as pd
from magniv.core import task
import pickle
from upload_download_s3 import download_s3, upload_s3
#load serialized model
serialized_model = open("tasks/model/model_lin.p", "rb")
model = pickle.load(serialized_model)
import pandas as pd
import os
s3_url = os.getenv("S3_URL")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
def upload_s3(data):
""" upload data to s3"""
upload_data = data.to_csv(s3_url, index=False, storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY
# using linear regression
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
preds_valid = model.predict(X_test)
linearreg =mean_absolute_error(y_test, preds_valid)
print(linearreg)
>>> 54.0539523895
# using xgboost regressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
scaler = StandardScaler()
x=new_train_df.drop("price",axis =1)
y=new_train_df.price
X = scaler.fit_transform(x)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23)
def convert_int(x):
"""convert float to int"""
try:
return int(x)
except:
pass
float_cols = train_df.select_dtypes("float64")
#convert mixed data type to int
new_train_df["reviews_per_month"] = new_train_df["reviews_per_month"].apply(convert_int)
# convert datetime str to pandas datetime so we can extract days, months and year.