Skip to content

Instantly share code, notes, and snippets.

fcn32model = fcn32_blank()
fcn32shape = fcn32model.layers[-1].output_shape
fcn32size = fcn32shape[2] # INFO: =32 when images are 512x512
sp4 = Convolution2D(21, 1, 1,
border_mode='same', # INFO : border_mode does not matter for 1x1
activation=None,
name='score_pool4')
if pretrained:
EMBEDDING = Embedding(vocab_size,
embedding_dimension,
weights=[glove_embedding_matrix])
else:
EMBEDDING = Embedding(vocab_size,
embedding_dimension)
@mzaradzki
mzaradzki / cbow_code_sample_keras.py
Last active April 4, 2017 08:09
CBOW word embedding in Keras
modelWRD = Sequential()
# 1st layer is a dummy-permutation=identity to specify input shape
modelWRD.add( Permute((1,), input_shape=(n_words,)) )
modelWRD.add( EMBEDDING )
modelWRD.add( Lambda(
lambda x : K.sum(x,axis=1), # sum over words
@mzaradzki
mzaradzki / installing_caffe.md
Last active April 24, 2017 11:07
tips on installing Caffe on an AWS Ubuntu instance
@mzaradzki
mzaradzki / docIndexer_snippet2.js
Created May 29, 2017 09:22
Lambda code to add a document to CloudSearch using javascript SDK
addToIndex = function (bucketName, docName, docContent, context) {
var csd = new AWS.CloudSearchDomain({
endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
apiVersion: '2013-01-01'
});
// see documentation at :
// http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
var jbatch = [ {"type": "add",
# before filling the null keep track of them
dfX['construction_year_missing'] = (dfX['construction_year']==0)*1
dates.append( 'construction_year_missing' ) # list of dates related fields
# to fill missing dates, can use : mean, median or oldest
mean_year = dfX[dfX['construction_year']>0]['construction_year'].mean()
dfX.loc[dfX['construction_year']==0, 'construction_year'] = int(mean_year)
# Split date_recorded stamp (string) into year, month, day of month, day of week features
from dateutil import parser
dfX['date_recorded_year'] = dfX['date_recorded'].apply(lambda x: int(x.split('-')[0]))
dates.append('date_recorded_year')
dfX['date_recorded_month'] = dfX['date_recorded'].apply(lambda x: int(x.split('-')[1]))
dates.append('date_recorded_month')
# WARNING : probably not usefull for this dataset
import pandas as pd
dfX = pd.read_csv('PUMP_training_set_values.csv') # predictive variables
dfY = pd.read_csv('PUMP_training_set_labels.csv') # target variable
dfOHE = None
for col in categories: # encode 1 category at a time
one_hot = pd.get_dummies(df[col], prefix=col)
# drop column as it is now encoded
if dfOHE is None:
dfOHE = df.drop(col, axis=1)
else:
dfOHE = dfOHE.drop(col, axis=1)
from sklearn.linear_model import LogisticRegression
dfLR = dfOHE.sample(frac=1) # shuffle the dataset before spliting it in 2 parts
dfLR_trn = dfLR[0:45000] # training set
dfLR_tst = dfLR[45000:] # testing set
LR = LogisticRegression(multi_class='ovr') # ovr = one (class) versus rest (of classes)
LR.fit(dfLR_trn[predictors].values, dfLR_trn['status_group_enc'].values)
# model accuracy score between 0% and 100%