m.zaradzki mzaradzki

## fcn16_code_sample.py
fcn32model = fcn32_blank()

fcn32shape = fcn32model.layers[-1].output_shape

fcn32size = fcn32shape[2] # INFO: =32 when images are 512x512

sp4 = Convolution2D(21, 1, 1,
                    border_mode='same', # INFO : border_mode does not matter for 1x1
                    activation=None,
                    name='score_pool4')

## cbow_code_sample_keras.py

modelWRD = Sequential()

# 1st layer is a dummy-permutation=identity to specify input shape
modelWRD.add( Permute((1,), input_shape=(n_words,)) )

modelWRD.add( EMBEDDING )

modelWRD.add( Lambda(
                   lambda x : K.sum(x,axis=1), # sum over words

## embedding_optional_glove.py

if pretrained:
    EMBEDDING = Embedding(vocab_size,
                          embedding_dimension,
                          weights=[glove_embedding_matrix])
else:
    EMBEDDING = Embedding(vocab_size,
                          embedding_dimension)


## installing_caffe.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mzaradzki
                / installing_caffe.md
            
            
              Last active
              April 24, 2017 11:07
            
              
                tips on installing Caffe on an AWS Ubuntu instance
              
          
    WARNING : Python 2.7 is much easier to run with Caffe than Python 3.5


https://iqbalnaved.wordpress.com/2016/05/08/installing-caffe-and-pycaffe-on-ubuntu-14-04/
WARNING : Command line at step 2 is actually 2 lines
WARNING : You dont have to use cmake for compiling, see berkeleyvision


http://caffe.berkeleyvision.org/install_apt.html
WARNING : you probably dont need all the dependencies, especially the "manual" ones


## docIndexer_snippet1.js
exports.handler = (event, context, callback) => {

    // WARNING :
    // This snippet assumes : event.Records[0].eventName == 'ObjectCreated:Put'
    // but the ful code deals with both 'ObjectCreated:Put' and 'ObjectRemoved:Delete'

    var filename = event.Records[0].s3.object.key;
    var bucketname = event.Records[0].s3.bucket.name;

    var params = {

## docIndexer_snippet2.js
addToIndex = function (bucketName, docName, docContent, context) {

    var csd = new AWS.CloudSearchDomain({
        endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
        apiVersion: '2013-01-01'
    });

    // see documentation at :
    // http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
    var jbatch = [  {"type": "add",

## docSearcher_snippet1.js
exports.handler = (event, context, callback) => {

    var csd = new AWS.CloudSearchDomain({
        endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
        apiVersion: '2013-01-01'
    });

    var params = {
        query: event.query,
        sort: '_score desc',

## pump_trim_categoricals.py
for col in categories:
    cs = dfX[col].value_counts(normalize=False, sort=True, ascending=False)
    rare_values = [k for k in cs.keys() if cs[k]<40] # Theshold = 40 occurrences
    if len(rare_values)>0:
        print( 'Trim values : ', col, len(rare_values))
        dfX.loc[dfX[col].isin(rare_values), col] = col+'_rare'

# Output :
# Trim values : funder      1730
# Trim values : installer   1982

## pump_similar_variables.py
# Search for variables that are very similar
def show_similars(cols, threshold=0.90):
    for i1, col1 in enumerate(cols):
        for i2, col2 in enumerate(cols):
            if (i1<i2):
                cm12 = pd.crosstab(dfX[col1], dfX[col2]).values # contingency table
                cv12 = cramers_corrected_stat(cm12) # Cramer V statistic
                if (cv12 > threshold):
                    print((col1, col2), int(cv12*100))

## pump_null_categorical.py
for col, tp in nullcols:
    if (tp == str):
        dfX.loc[dfX[col].isnull(), col] = 'MISSING'
	fcn32model = fcn32_blank()

	fcn32shape = fcn32model.layers[-1].output_shape

	fcn32size = fcn32shape[2] # INFO: =32 when images are 512x512

	sp4 = Convolution2D(21, 1, 1,
	border_mode='same', # INFO : border_mode does not matter for 1x1
	activation=None,
	name='score_pool4')

	modelWRD = Sequential()

	# 1st layer is a dummy-permutation=identity to specify input shape
	modelWRD.add( Permute((1,), input_shape=(n_words,)) )

	modelWRD.add( EMBEDDING )

	modelWRD.add( Lambda(
	lambda x : K.sum(x,axis=1), # sum over words

	if pretrained:
	EMBEDDING = Embedding(vocab_size,
	embedding_dimension,
	weights=[glove_embedding_matrix])
	else:
	EMBEDDING = Embedding(vocab_size,
	embedding_dimension)
	exports.handler = (event, context, callback) => {

	// WARNING :
	// This snippet assumes : event.Records[0].eventName == 'ObjectCreated:Put'
	// but the ful code deals with both 'ObjectCreated:Put' and 'ObjectRemoved:Delete'

	var filename = event.Records[0].s3.object.key;
	var bucketname = event.Records[0].s3.bucket.name;

	var params = {
	addToIndex = function (bucketName, docName, docContent, context) {

	var csd = new AWS.CloudSearchDomain({
	endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
	apiVersion: '2013-01-01'
	});

	// see documentation at :
	// http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
	var jbatch = [ {"type": "add",
	for col in categories:
	cs = dfX[col].value_counts(normalize=False, sort=True, ascending=False)
	rare_values = [k for k in cs.keys() if cs[k]<40] # Theshold = 40 occurrences
	if len(rare_values)>0:
	print( 'Trim values : ', col, len(rare_values))
	dfX.loc[dfX[col].isin(rare_values), col] = col+'_rare'

	# Output :
	# Trim values : funder 1730
	# Trim values : installer 1982
	# Search for variables that are very similar
	def show_similars(cols, threshold=0.90):
	for i1, col1 in enumerate(cols):
	for i2, col2 in enumerate(cols):
	if (i1<i2):
	cm12 = pd.crosstab(dfX[col1], dfX[col2]).values # contingency table
	cv12 = cramers_corrected_stat(cm12) # Cramer V statistic
	if (cv12 > threshold):
	print((col1, col2), int(cv12*100))
	for col, tp in nullcols:
	if (tp == str):
	dfX.loc[dfX[col].isnull(), col] = 'MISSING'