m.zaradzki mzaradzki

## fcn16_code_sample.py
fcn32model = fcn32_blank()

fcn32shape = fcn32model.layers[-1].output_shape

fcn32size = fcn32shape[2] # INFO: =32 when images are 512x512

sp4 = Convolution2D(21, 1, 1,
                    border_mode='same', # INFO : border_mode does not matter for 1x1
                    activation=None,
                    name='score_pool4')

## embedding_optional_glove.py

if pretrained:
    EMBEDDING = Embedding(vocab_size,
                          embedding_dimension,
                          weights=[glove_embedding_matrix])
else:
    EMBEDDING = Embedding(vocab_size,
                          embedding_dimension)


## cbow_code_sample_keras.py

modelWRD = Sequential()

# 1st layer is a dummy-permutation=identity to specify input shape
modelWRD.add( Permute((1,), input_shape=(n_words,)) )

modelWRD.add( EMBEDDING )

modelWRD.add( Lambda(
                   lambda x : K.sum(x,axis=1), # sum over words

## installing_caffe.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mzaradzki
                / installing_caffe.md
            
            
              Last active
              April 24, 2017 11:07
            
              
                tips on installing Caffe on an AWS Ubuntu instance
              
          
    WARNING : Python 2.7 is much easier to run with Caffe than Python 3.5


https://iqbalnaved.wordpress.com/2016/05/08/installing-caffe-and-pycaffe-on-ubuntu-14-04/
WARNING : Command line at step 2 is actually 2 lines
WARNING : You dont have to use cmake for compiling, see berkeleyvision


http://caffe.berkeleyvision.org/install_apt.html
WARNING : you probably dont need all the dependencies, especially the "manual" ones


## docIndexer_snippet2.js
addToIndex = function (bucketName, docName, docContent, context) {

    var csd = new AWS.CloudSearchDomain({
        endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
        apiVersion: '2013-01-01'
    });

    // see documentation at :
    // http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
    var jbatch = [  {"type": "add",

## pump_null_dates.py
# before filling the null keep track of them
dfX['construction_year_missing'] = (dfX['construction_year']==0)*1
dates.append( 'construction_year_missing' ) # list of dates related fields

# to fill missing dates, can use : mean, median or oldest
mean_year = dfX[dfX['construction_year']>0]['construction_year'].mean()

dfX.loc[dfX['construction_year']==0, 'construction_year'] = int(mean_year)

## pump_parsing_dates.py
# Split date_recorded stamp (string) into year, month, day of month, day of week features
from dateutil import parser

dfX['date_recorded_year'] = dfX['date_recorded'].apply(lambda x: int(x.split('-')[0]))
dates.append('date_recorded_year')

dfX['date_recorded_month'] = dfX['date_recorded'].apply(lambda x: int(x.split('-')[1]))
dates.append('date_recorded_month')

# WARNING : probably not usefull for this dataset

## pump_import_data.py
import pandas as pd

dfX = pd.read_csv('PUMP_training_set_values.csv') # predictive variables
dfY = pd.read_csv('PUMP_training_set_labels.csv') # target variable

## pump_ohe.py
dfOHE = None

for col in categories: # encode 1 category at a time

    one_hot = pd.get_dummies(df[col], prefix=col)
    # drop column as it is now encoded
    if dfOHE is None:
        dfOHE = df.drop(col, axis=1)
    else:
        dfOHE = dfOHE.drop(col, axis=1)

## pump_logistic.py
from sklearn.linear_model import LogisticRegression

dfLR = dfOHE.sample(frac=1) # shuffle the dataset before spliting it in 2 parts
dfLR_trn = dfLR[0:45000] # training set
dfLR_tst = dfLR[45000:] # testing set

LR = LogisticRegression(multi_class='ovr') # ovr = one (class) versus rest (of classes)
LR.fit(dfLR_trn[predictors].values, dfLR_trn['status_group_enc'].values)

# model accuracy score between 0% and 100%
	fcn32model = fcn32_blank()

	fcn32shape = fcn32model.layers[-1].output_shape

	fcn32size = fcn32shape[2] # INFO: =32 when images are 512x512

	sp4 = Convolution2D(21, 1, 1,
	border_mode='same', # INFO : border_mode does not matter for 1x1
	activation=None,
	name='score_pool4')

	if pretrained:
	EMBEDDING = Embedding(vocab_size,
	embedding_dimension,
	weights=[glove_embedding_matrix])
	else:
	EMBEDDING = Embedding(vocab_size,
	embedding_dimension)

	modelWRD = Sequential()

	# 1st layer is a dummy-permutation=identity to specify input shape
	modelWRD.add( Permute((1,), input_shape=(n_words,)) )

	modelWRD.add( EMBEDDING )

	modelWRD.add( Lambda(
	lambda x : K.sum(x,axis=1), # sum over words
	addToIndex = function (bucketName, docName, docContent, context) {

	var csd = new AWS.CloudSearchDomain({
	endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
	apiVersion: '2013-01-01'
	});

	// see documentation at :
	// http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
	var jbatch = [ {"type": "add",
	# before filling the null keep track of them
	dfX['construction_year_missing'] = (dfX['construction_year']==0)*1
	dates.append( 'construction_year_missing' ) # list of dates related fields

	# to fill missing dates, can use : mean, median or oldest
	mean_year = dfX[dfX['construction_year']>0]['construction_year'].mean()

	dfX.loc[dfX['construction_year']==0, 'construction_year'] = int(mean_year)
	# Split date_recorded stamp (string) into year, month, day of month, day of week features
	from dateutil import parser

	dfX['date_recorded_year'] = dfX['date_recorded'].apply(lambda x: int(x.split('-')[0]))
	dates.append('date_recorded_year')

	dfX['date_recorded_month'] = dfX['date_recorded'].apply(lambda x: int(x.split('-')[1]))
	dates.append('date_recorded_month')

	# WARNING : probably not usefull for this dataset
	import pandas as pd

	dfX = pd.read_csv('PUMP_training_set_values.csv') # predictive variables
	dfY = pd.read_csv('PUMP_training_set_labels.csv') # target variable
	dfOHE = None

	for col in categories: # encode 1 category at a time

	one_hot = pd.get_dummies(df[col], prefix=col)
	# drop column as it is now encoded
	if dfOHE is None:
	dfOHE = df.drop(col, axis=1)
	else:
	dfOHE = dfOHE.drop(col, axis=1)
	from sklearn.linear_model import LogisticRegression

	dfLR = dfOHE.sample(frac=1) # shuffle the dataset before spliting it in 2 parts
	dfLR_trn = dfLR[0:45000] # training set
	dfLR_tst = dfLR[45000:] # testing set

	LR = LogisticRegression(multi_class='ovr') # ovr = one (class) versus rest (of classes)
	LR.fit(dfLR_trn[predictors].values, dfLR_trn['status_group_enc'].values)

	# model accuracy score between 0% and 100%