Skip to content

Instantly share code, notes, and snippets.

View Emekaborisama's full-sized avatar
🎯
Focusing

Emeka boris ama Emekaborisama

🎯
Focusing
View GitHub Profile
View SampleData - SampleData.csv
Continent Frontend Backend
Africa 40 80
Europe 80 120
Asia 120 80
North America 100 80
South America 200 150
Australia 18 50
View requirements.txt
sklearn
s3fs
pandas
awscli
magniv
View example3_magniv.py
#example
@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")
View example_magniv.py
#example
@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")
View example1.py
#example
@task(key="second",schedule="@monthly",resources={"cpu": "2000m", "memory": "2Gi"},description=" preprocess data and run price prediction inference")
View app.py
import os
import resource
import requests
import pandas as pd
from magniv.core import task
import pickle
from upload_download_s3 import download_s3, upload_s3
#load serialized model
serialized_model = open("tasks/model/model_lin.p", "rb")
model = pickle.load(serialized_model)
View upload_download_s3.py
import pandas as pd
import os
s3_url = os.getenv("S3_URL")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
def upload_s3(data):
""" upload data to s3"""
upload_data = data.to_csv(s3_url, index=False, storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY
View ml_model.py
# using linear regression
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
preds_valid = model.predict(X_test)
linearreg =mean_absolute_error(y_test, preds_valid)
print(linearreg)
>>> 54.0539523895
# using xgboost regressor
View cross_validation.py
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
scaler = StandardScaler()
x=new_train_df.drop("price",axis =1)
y=new_train_df.price
X = scaler.fit_transform(x)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23)
View data_preprocess.py
def convert_int(x):
"""convert float to int"""
try:
return int(x)
except:
pass
float_cols = train_df.select_dtypes("float64")
#convert mixed data type to int
new_train_df["reviews_per_month"] = new_train_df["reviews_per_month"].apply(convert_int)
# convert datetime str to pandas datetime so we can extract days, months and year.