abdel1979/Model.py

## Model.py
import sys
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression


filename = sys.argv[1]
# Read the data that will serve for training and the test data
X_full = pd.read_csv("dataTraining.csv", index_col='x001')
# Read the data that will serve for testing
X_test = pd.read_csv(filename, index_col='x001')


# Remove rows with missing target from the  data
X_full.dropna(axis=0, subset=['y'], inplace=True)
X_test.dropna(axis=0, subset=['y'], inplace=True)


#separate target from predictors in both training and test data
y = X_full.y
y_test=X_test.y

X_full.drop(['y'], axis=1, inplace=True)
X_test.drop(['y'], axis=1, inplace=True)


model=LinearRegression()
model.fit(X_full,y)
preds = model.predict(X_test)


# Save test predictions to file
output = pd.DataFrame({'Id': X_test.index,'Y Original': y_test, 'Y predicted':preds})
output.to_csv('/data/outputTest.txt', index=False)
	import sys
	import warnings
	warnings.filterwarnings("ignore", category=FutureWarning)

	import math
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.impute import SimpleImputer
	from sklearn.metrics import mean_squared_error
	from sklearn.linear_model import LinearRegression




	filename = sys.argv[1]
	# Read the data that will serve for training and the test data
	X_full = pd.read_csv("dataTraining.csv", index_col='x001')
	# Read the data that will serve for testing
	X_test = pd.read_csv(filename, index_col='x001')


	# Remove rows with missing target from the data
	X_full.dropna(axis=0, subset=['y'], inplace=True)
	X_test.dropna(axis=0, subset=['y'], inplace=True)



	#separate target from predictors in both training and test data
	y = X_full.y
	y_test=X_test.y

	X_full.drop(['y'], axis=1, inplace=True)
	X_test.drop(['y'], axis=1, inplace=True)


	model=LinearRegression()
	model.fit(X_full,y)
	preds = model.predict(X_test)


	# Save test predictions to file
	output = pd.DataFrame({'Id': X_test.index,'Y Original': y_test, 'Y predicted':preds})
	output.to_csv('/data/outputTest.txt', index=False)