Skip to content

Instantly share code, notes, and snippets.

@abdel1979
Created July 19, 2019 08:42
Show Gist options
  • Save abdel1979/c4188ad983c15d182bad3c9c297e8a00 to your computer and use it in GitHub Desktop.
Save abdel1979/c4188ad983c15d182bad3c9c297e8a00 to your computer and use it in GitHub Desktop.
import sys
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
filename = sys.argv[1]
# Read the data that will serve for training and the test data
X_full = pd.read_csv("dataTraining.csv", index_col='x001')
# Read the data that will serve for testing
X_test = pd.read_csv(filename, index_col='x001')
# Remove rows with missing target from the data
X_full.dropna(axis=0, subset=['y'], inplace=True)
X_test.dropna(axis=0, subset=['y'], inplace=True)
#separate target from predictors in both training and test data
y = X_full.y
y_test=X_test.y
X_full.drop(['y'], axis=1, inplace=True)
X_test.drop(['y'], axis=1, inplace=True)
model=LinearRegression()
model.fit(X_full,y)
preds = model.predict(X_test)
# Save test predictions to file
output = pd.DataFrame({'Id': X_test.index,'Y Original': y_test, 'Y predicted':preds})
output.to_csv('/data/outputTest.txt', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment