Skip to content

Instantly share code, notes, and snippets.

fcn32model = fcn32_blank()
fcn32shape = fcn32model.layers[-1].output_shape
fcn32size = fcn32shape[2] # INFO: =32 when images are 512x512
sp4 = Convolution2D(21, 1, 1,
border_mode='same', # INFO : border_mode does not matter for 1x1
activation=None,
name='score_pool4')
@mzaradzki
mzaradzki / cbow_code_sample_keras.py
Last active April 4, 2017 08:09
CBOW word embedding in Keras
modelWRD = Sequential()
# 1st layer is a dummy-permutation=identity to specify input shape
modelWRD.add( Permute((1,), input_shape=(n_words,)) )
modelWRD.add( EMBEDDING )
modelWRD.add( Lambda(
lambda x : K.sum(x,axis=1), # sum over words
if pretrained:
EMBEDDING = Embedding(vocab_size,
embedding_dimension,
weights=[glove_embedding_matrix])
else:
EMBEDDING = Embedding(vocab_size,
embedding_dimension)
@mzaradzki
mzaradzki / installing_caffe.md
Last active April 24, 2017 11:07
tips on installing Caffe on an AWS Ubuntu instance
@mzaradzki
mzaradzki / docIndexer_snippet1.js
Created May 29, 2017 09:13
AWS Lambda to index S3 new files in CloudSearch
exports.handler = (event, context, callback) => {
// WARNING :
// This snippet assumes : event.Records[0].eventName == 'ObjectCreated:Put'
// but the ful code deals with both 'ObjectCreated:Put' and 'ObjectRemoved:Delete'
var filename = event.Records[0].s3.object.key;
var bucketname = event.Records[0].s3.bucket.name;
var params = {
@mzaradzki
mzaradzki / docIndexer_snippet2.js
Created May 29, 2017 09:22
Lambda code to add a document to CloudSearch using javascript SDK
addToIndex = function (bucketName, docName, docContent, context) {
var csd = new AWS.CloudSearchDomain({
endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
apiVersion: '2013-01-01'
});
// see documentation at :
// http://docs.aws.amazon.com/cloudsearch/latest/developerguide/preparing-data.html#creating-document-batches
var jbatch = [ {"type": "add",
@mzaradzki
mzaradzki / docSearcher_snippet1.js
Created May 29, 2017 16:16
AWS Lambda function to class Cloud Search API with javascript SDK
exports.handler = (event, context, callback) => {
var csd = new AWS.CloudSearchDomain({
endpoint: CS_NAME+'.'+SERVICES_REGION+'.cloudsearch.amazonaws.com',
apiVersion: '2013-01-01'
});
var params = {
query: event.query,
sort: '_score desc',
for col in categories:
cs = dfX[col].value_counts(normalize=False, sort=True, ascending=False)
rare_values = [k for k in cs.keys() if cs[k]<40] # Theshold = 40 occurrences
if len(rare_values)>0:
print( 'Trim values : ', col, len(rare_values))
dfX.loc[dfX[col].isin(rare_values), col] = col+'_rare'
# Output :
# Trim values : funder 1730
# Trim values : installer 1982
# Search for variables that are very similar
def show_similars(cols, threshold=0.90):
for i1, col1 in enumerate(cols):
for i2, col2 in enumerate(cols):
if (i1<i2):
cm12 = pd.crosstab(dfX[col1], dfX[col2]).values # contingency table
cv12 = cramers_corrected_stat(cm12) # Cramer V statistic
if (cv12 > threshold):
print((col1, col2), int(cv12*100))
@mzaradzki
mzaradzki / pump_null_categorical.py
Last active July 3, 2017 10:20
Handling of Null values for categorical and boolean columns in Pandas dataframes
for col, tp in nullcols:
if (tp == str):
dfX.loc[dfX[col].isnull(), col] = 'MISSING'