Skip to content

Instantly share code, notes, and snippets.

@LukasMosser
Created October 26, 2016 20:47
Show Gist options
  • Save LukasMosser/cd645bad2bdbbb419098ac3ea363f2b3 to your computer and use it in GitHub Desktop.
Save LukasMosser/cd645bad2bdbbb419098ac3ea363f2b3 to your computer and use it in GitHub Desktop.
K Fold training, with one well as blind well.
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
from pandas import set_option
from sklearn import preprocessing
"""
More or less unchanged code from original contest notebook.
Changes:
Removed dropping any wells
"""
def label_facies(row, labels):
return labels[ row['Facies'] -1]
set_option("display.max_rows", 10)
pd.options.mode.chained_assignment = None
filename = 'facies_vectors.csv'
training_data = pd.read_csv(filename)
training_data['Well Name'] = training_data['Well Name'].astype('category')
training_data['Formation'] = training_data['Formation'].astype('category')
training_data['Well Name'].unique()
# 1=sandstone 2=c_siltstone 3=f_siltstone
# 4=marine_silt_shale 5=mudstone 6=wackestone 7=dolomite
# 8=packstone 9=bafflestone
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00',
'#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']
facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',
'WS', 'D','PS', 'BS']
#facies_color_map is a dictionary that maps facies labels
#to their respective colors
facies_color_map = {}
for ind, label in enumerate(facies_labels):
facies_color_map[label] = facies_colors[ind]
training_data.loc[:,'FaciesLabels'] = training_data.apply(lambda row: label_facies(row, facies_labels), axis=1)
training_data.describe()
PE_mask = training_data['PE'].notnull().values
training_data = training_data[PE_mask]
correct_facies_labels = training_data['Facies'].values
"""
End of original tutorial code
"""
#Create a set of unique well names
names = list(set(training_data["Well Name"]))
#Create a dicitionary of the well datasets, continued from original contest notebook
#But perform dropping for each well individually
#Maybe not necessary.
well_datas = {}
for name in names:
well = training_data[training_data["Well Name"]==name]
well_labels = well['Facies'].values.astype(np.int64)
well = well.drop(['Formation', 'Well Name', 'Depth','Facies','FaciesLabels'], axis=1).values
well_datas[name] = [well, well_labels]
X_data = {}
Y_data = {}
for name, (data, labels) in well_datas.iteritems():
Y_data[name] = np.array(labels, dtype=np.int64)
X_data[name] = np.array(data, dtype=np.float32)
training_sets = []
test_sets = []
for i in range(len(names)):
X_train = []
Y_train = []
X_test = []
Y_test = []
for name, data in X_data.iteritems():
if name is not names[i]:
for row in data:
X_train.append(row)
else:
for row in data:
X_test.append(row)
for name, labels in Y_data.iteritems():
if name is not names[i]:
for val in labels:
Y_train.append(val)
else:
for val in labels:
Y_test.append(val)
X_train = np.array(X_train, dtype=np.float32)
Y_train = np.array(Y_train, dtype=np.int64).reshape(len(Y_train), 1)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = np.array(X_test, dtype=np.float32)
X_test = scaler.transform(X_test)
Y_test = np.array(Y_test, dtype=np.int32)
training_sets.append([X_train, Y_train, X_test, Y_test])
#Use as follows:
scores = []
for i, (X_train, Y_train, X_test, Y_test) in enumerate(training_sets):
#classifier = some_classifier()
#classifier.train(X_train, Y_train)
#Y_Predict = classifier.predict(X_test)
#Scoring
#scores.append(score)
print X_train.shape, Y_train.shape, X_test.shape, Y_test.shape
pass
#print np.mean(scores)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment