Created
October 26, 2016 20:47
-
-
Save LukasMosser/cd645bad2bdbbb419098ac3ea363f2b3 to your computer and use it in GitHub Desktop.
K Fold training, with one well as blind well.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import pandas as pd | |
import numpy as np | |
import matplotlib as mpl | |
import matplotlib.pyplot as plt | |
import matplotlib.colors as colors | |
from mpl_toolkits.axes_grid1 import make_axes_locatable | |
from pandas import set_option | |
from sklearn import preprocessing | |
""" | |
More or less unchanged code from original contest notebook. | |
Changes: | |
Removed dropping any wells | |
""" | |
def label_facies(row, labels): | |
return labels[ row['Facies'] -1] | |
set_option("display.max_rows", 10) | |
pd.options.mode.chained_assignment = None | |
filename = 'facies_vectors.csv' | |
training_data = pd.read_csv(filename) | |
training_data['Well Name'] = training_data['Well Name'].astype('category') | |
training_data['Formation'] = training_data['Formation'].astype('category') | |
training_data['Well Name'].unique() | |
# 1=sandstone 2=c_siltstone 3=f_siltstone | |
# 4=marine_silt_shale 5=mudstone 6=wackestone 7=dolomite | |
# 8=packstone 9=bafflestone | |
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00', | |
'#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D'] | |
facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', | |
'WS', 'D','PS', 'BS'] | |
#facies_color_map is a dictionary that maps facies labels | |
#to their respective colors | |
facies_color_map = {} | |
for ind, label in enumerate(facies_labels): | |
facies_color_map[label] = facies_colors[ind] | |
training_data.loc[:,'FaciesLabels'] = training_data.apply(lambda row: label_facies(row, facies_labels), axis=1) | |
training_data.describe() | |
PE_mask = training_data['PE'].notnull().values | |
training_data = training_data[PE_mask] | |
correct_facies_labels = training_data['Facies'].values | |
""" | |
End of original tutorial code | |
""" | |
#Create a set of unique well names | |
names = list(set(training_data["Well Name"])) | |
#Create a dicitionary of the well datasets, continued from original contest notebook | |
#But perform dropping for each well individually | |
#Maybe not necessary. | |
well_datas = {} | |
for name in names: | |
well = training_data[training_data["Well Name"]==name] | |
well_labels = well['Facies'].values.astype(np.int64) | |
well = well.drop(['Formation', 'Well Name', 'Depth','Facies','FaciesLabels'], axis=1).values | |
well_datas[name] = [well, well_labels] | |
X_data = {} | |
Y_data = {} | |
for name, (data, labels) in well_datas.iteritems(): | |
Y_data[name] = np.array(labels, dtype=np.int64) | |
X_data[name] = np.array(data, dtype=np.float32) | |
training_sets = [] | |
test_sets = [] | |
for i in range(len(names)): | |
X_train = [] | |
Y_train = [] | |
X_test = [] | |
Y_test = [] | |
for name, data in X_data.iteritems(): | |
if name is not names[i]: | |
for row in data: | |
X_train.append(row) | |
else: | |
for row in data: | |
X_test.append(row) | |
for name, labels in Y_data.iteritems(): | |
if name is not names[i]: | |
for val in labels: | |
Y_train.append(val) | |
else: | |
for val in labels: | |
Y_test.append(val) | |
X_train = np.array(X_train, dtype=np.float32) | |
Y_train = np.array(Y_train, dtype=np.int64).reshape(len(Y_train), 1) | |
scaler = preprocessing.StandardScaler().fit(X_train) | |
X_train = scaler.transform(X_train) | |
X_test = np.array(X_test, dtype=np.float32) | |
X_test = scaler.transform(X_test) | |
Y_test = np.array(Y_test, dtype=np.int32) | |
training_sets.append([X_train, Y_train, X_test, Y_test]) | |
#Use as follows: | |
scores = [] | |
for i, (X_train, Y_train, X_test, Y_test) in enumerate(training_sets): | |
#classifier = some_classifier() | |
#classifier.train(X_train, Y_train) | |
#Y_Predict = classifier.predict(X_test) | |
#Scoring | |
#scores.append(score) | |
print X_train.shape, Y_train.shape, X_test.shape, Y_test.shape | |
pass | |
#print np.mean(scores) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment