peakBreaker/postproc_scikit_sample.py

## postproc_scikit_sample.py
# Pred and prob arrays are numpy array outputs from a sklearn model:
#  - pred_array = model.predict(X).astype(int)
#  - prob_arr = model.predict_proba(X)
#
# Here we run the inital data through multiple models and structure the
# model output into a multilevel dataframe for probabilities and predictions
#
# Typically the next stage would be to enhance the labels of numerical results
# to string/categories or similar basaed on whatever we want, aswell as providing
# the results to a database or something like that

prob_arr_m1 = model1.predict_proba(original_df)
prob_arr_m2 = model2.predict_proba(original_df)
prob_arr_m3 = model3.predict_proba(original_df)
pred_arr_m1 = model1.predict(original_df).astype(int)
pred_arr_m2 = model2.predict(original_df).astype(int)
pred_arr_m3 = model3.predict(original_df).astype(int)

# Stack the predictions
predictions = np.column_stack((pred_arr_m1, pred_arr_m2, pred_arr_m3))
probabilities = np.column_stack((prob_arr_m1, prob_arr_m2, prob_arr_m3))

# Create the multilevel index
probcols_raw = ['m1_prob1', 'm1_prob2', 'm2_prob1', 'm2_prob2', 'm2_prob3', 'm2_prob4',
                'm2_prob5', 'm2_prob6', 'm3_prob1', 'm1_prob2']
predcols_raw = ['m1_prediction', 'm2_prediction', 'm3_prediction']
predcols = [s for s in map(lambda e: ('segments', e), predcols_raw)]
probcols = [p for p in map(lambda e: ('probabilities', e), probcols_raw)]
cols = pd.MultiIndex.from_tuples([*segcols, *probcols])

# Converting to dataframe with multiindex
pred_df = pd.DataFrame(index=original_df.index, columns=cols)
pred_df['segments'] = predictions
pred_df['probabilities'] = probabilities
	# Pred and prob arrays are numpy array outputs from a sklearn model:
	# - pred_array = model.predict(X).astype(int)
	# - prob_arr = model.predict_proba(X)
	#
	# Here we run the inital data through multiple models and structure the
	# model output into a multilevel dataframe for probabilities and predictions
	#
	# Typically the next stage would be to enhance the labels of numerical results
	# to string/categories or similar basaed on whatever we want, aswell as providing
	# the results to a database or something like that

	prob_arr_m1 = model1.predict_proba(original_df)
	prob_arr_m2 = model2.predict_proba(original_df)
	prob_arr_m3 = model3.predict_proba(original_df)
	pred_arr_m1 = model1.predict(original_df).astype(int)
	pred_arr_m2 = model2.predict(original_df).astype(int)
	pred_arr_m3 = model3.predict(original_df).astype(int)

	# Stack the predictions
	predictions = np.column_stack((pred_arr_m1, pred_arr_m2, pred_arr_m3))
	probabilities = np.column_stack((prob_arr_m1, prob_arr_m2, prob_arr_m3))

	# Create the multilevel index
	probcols_raw = ['m1_prob1', 'm1_prob2', 'm2_prob1', 'm2_prob2', 'm2_prob3', 'm2_prob4',
	'm2_prob5', 'm2_prob6', 'm3_prob1', 'm1_prob2']
	predcols_raw = ['m1_prediction', 'm2_prediction', 'm3_prediction']
	predcols = [s for s in map(lambda e: ('segments', e), predcols_raw)]
	probcols = [p for p in map(lambda e: ('probabilities', e), probcols_raw)]
	cols = pd.MultiIndex.from_tuples([segcols, probcols])

	# Converting to dataframe with multiindex
	pred_df = pd.DataFrame(index=original_df.index, columns=cols)
	pred_df['segments'] = predictions
	pred_df['probabilities'] = probabilities