Emeka boris ama Emekaborisama

## SampleData - SampleData.csv

          
            Continent
            Frontend
            Backend

            
              Africa
              40
              80

            
              Europe
              80
              120

            
              Asia
              120
              80

            
              North America
              100
              80

            
              South America
              200
              150

            
              Australia
              18
              50

## requirements.txt
sklearn
s3fs
pandas
awscli
magniv

## example3_magniv.py
#example
@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")

## example_magniv.py
#example
@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")

## example1.py
#example


@task(key="second",schedule="@monthly",resources={"cpu": "2000m", "memory": "2Gi"},description=" preprocess data and run price prediction inference")

## app.py
import os
import resource
import requests
import pandas as pd
from magniv.core import task
import pickle
from upload_download_s3 import download_s3, upload_s3
#load serialized model
serialized_model = open("tasks/model/model_lin.p", "rb")
model = pickle.load(serialized_model)

## upload_download_s3.py
import pandas as pd
import os
s3_url = os.getenv("S3_URL")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
def upload_s3(data):
""" upload data to s3"""
upload_data = data.to_csv(s3_url, index=False, storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY

## ml_model.py
# using linear regression
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
preds_valid = model.predict(X_test)
linearreg =mean_absolute_error(y_test, preds_valid)
print(linearreg)
>>> 54.0539523895

# using xgboost regressor

## cross_validation.py
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
scaler = StandardScaler()
x=new_train_df.drop("price",axis =1)
y=new_train_df.price
X = scaler.fit_transform(x)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23)

## data_preprocess.py
def convert_int(x):
    """convert float to int"""
    try:
        return int(x)
    except:
        pass
float_cols = train_df.select_dtypes("float64")
#convert mixed data type to int
new_train_df["reviews_per_month"] = new_train_df["reviews_per_month"].apply(convert_int)
# convert datetime str to pandas datetime so we can extract days, months and year.
Continent	Frontend	Backend
Africa	40	80
Europe	80	120
Asia	120	80
North America	100	80
South America	200	150
Australia	18	50
	#example
	@task(key='first', schedule="@monthly",on_success=["second"], description=" get airbnb data and store it on s3")
	#example


	@task(key="second",schedule="@monthly",resources={"cpu": "2000m", "memory": "2Gi"},description=" preprocess data and run price prediction inference")
	import os
	import resource
	import requests
	import pandas as pd
	from magniv.core import task
	import pickle
	from upload_download_s3 import download_s3, upload_s3
	#load serialized model
	serialized_model = open("tasks/model/model_lin.p", "rb")
	model = pickle.load(serialized_model)
	import pandas as pd
	import os
	s3_url = os.getenv("S3_URL")
	AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
	AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
	def upload_s3(data):
	""" upload data to s3"""
	upload_data = data.to_csv(s3_url, index=False, storage_options={
	"key": AWS_ACCESS_KEY_ID,
	"secret": AWS_SECRET_ACCESS_KEY
	# using linear regression
	from sklearn.linear_model import LinearRegression
	model = LinearRegression()
	model.fit(X_train, y_train)
	preds_valid = model.predict(X_test)
	linearreg =mean_absolute_error(y_test, preds_valid)
	print(linearreg)
	>>> 54.0539523895

	# using xgboost regressor
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_absolute_error, mean_squared_error
	from math import sqrt
	scaler = StandardScaler()
	x=new_train_df.drop("price",axis =1)
	y=new_train_df.price
	X = scaler.fit_transform(x)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=23)
	def convert_int(x):
	"""convert float to int"""
	try:
	return int(x)
	except:
	pass
	float_cols = train_df.select_dtypes("float64")
	#convert mixed data type to int
	new_train_df["reviews_per_month"] = new_train_df["reviews_per_month"].apply(convert_int)
	# convert datetime str to pandas datetime so we can extract days, months and year.