uds5501/refineData.py

## refineData.py
# Importing the tasty stuff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib

X = finalDf['size_in_mb']
y = finalDf['pop_categories']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)

# For single label values
npX_train = np.array(X_train)
npX_train = npX_train.reshape(-1,1)

npX_test = np.array(X_test)
npX_test = npX_test.reshape(-1,1)

# Scaling, of course, for quicker learning time
scaler = StandardScaler()

npX_train = scaler.fit_transform(npX_train)
npX_test = scaler.transform(npX_test)
	# Importing the tasty stuff
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import confusion_matrix
	from sklearn.externals import joblib

	X = finalDf['size_in_mb']
	y = finalDf['pop_categories']

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)

	# For single label values
	npX_train = np.array(X_train)
	npX_train = npX_train.reshape(-1,1)

	npX_test = np.array(X_test)
	npX_test = npX_test.reshape(-1,1)

	# Scaling, of course, for quicker learning time
	scaler = StandardScaler()

	npX_train = scaler.fit_transform(npX_train)
	npX_test = scaler.transform(npX_test)