Skip to content

Instantly share code, notes, and snippets.

View dcshapiro's full-sized avatar
🏡
Having Fun With Data

Daniel Shapiro dcshapiro

🏡
Having Fun With Data
View GitHub Profile
@dcshapiro
dcshapiro / gist:0930e7ddcd9149a8db8c51a58fefdf17
Created August 7, 2017 17:41
Collecting and organizing faces images
#remove duplicates
fdupes -rdN /root/images/
#remove blanks
find /root/images/ -size 0 -delete
#throw it all in a zip file
zip -0 -r faces.zip images/
#now on the windows side we flatten the images into one directory after unzipping
for /r C:\Users\Daniel\Downloads\faces %f in (*) do @move "%f" C:\Users\Daniel\Downloads\faces
@dcshapiro
dcshapiro / gist:4aad96aaa24d39513eb862d5ea97ed42
Created October 8, 2017 23:39
Extract Event Features from event
def getEventFeatures(event, map, graph):
#extract numerical fields into numpy array
#extract categorical fields
#binarize categorical fields using the categorical feature map
#extract image data into fixed size vector using CNN(s)
#extract text data into fixed-length word2vec vector (e.g. sentence vector, paragraph vector, doc2vec)
#extract relationship data from the knowledge graph
#concatenate the extracted features above into a fixed-length feature vector x
#example: x=np.concatenate((x, image_data), axis=1)
return x
@dcshapiro
dcshapiro / SimpleClassifierDNN
Created October 8, 2017 23:45
high level idea for a classifier of feature vectors
model = Sequential()
import keras.regularizers as kr
w_reg = kr.WeightRegularizer(l1=amountOfL1, l2=amountOfL2)
model.add(Dense(width, input_dim=x_train.shape[1], activation='relu', W_regularizer=w_reg))
model.add(Dropout(dropoutAmount))
for i in range(layerCount):
model.add(GaussianNoise(noiseLevel))
model.add(Dropout(dropoutAmount))
model.add(Dense(width, activation='relu', W_regularizer=w_reg))
model.add(Dense(y_train.shape[1], activation='relu', W_regularizer=w_reg))
%%time
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
# Generate dummy data
import numpy as np
x_train = np.random.random((10000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(10000, 1)), num_classes=10)
%%time
from numpy import newaxis
results=[]
for i in range(len(x_test)):
x_i=x_test[i]
results.append(model.predict(x_i[newaxis,:]))
%%time
# LSTM for international airline passengers problem with regression framing
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
%%time
img_path = "superintelligence-book-cover.png"
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.preprocessing import image
from keras.applications.vgg19 import preprocess_input, decode_predictions
import numpy as np
model_vgg19 = VGG19(weights='imagenet')
model = Model(input=model_vgg19.input, output=model_vgg19.get_layer('predictions').output)
%%time
for i in range(100):
img = image.load_img(img_path, target_size=(224, 224))
x_i = preprocess_input(np.expand_dims(image.img_to_array(img), axis=0))
block4_pool_features = model.predict(x_i)
features=(decode_predictions(block4_pool_features,top=0)[0])
print(features)
@dcshapiro
dcshapiro / part1.py
Created December 18, 2017 04:43
Data collection for mock medical notes and billing code generation
import pandas as pd
from random import shuffle
import random
import names
bodyParts = ['ankle', 'arch', 'arm', 'armpit', 'beard', 'breast', 'calf', 'cheek', 'chest', 'chin', 'earlobe', 'elbow', 'eyebrow', 'eyelash', 'eyelid', 'face', 'finger', 'forearm', 'forehead', 'gum', 'heel', 'hip', 'index finger', 'jaw', 'knee', 'knuckle', 'leg', 'lip', 'mouth', 'mustache', 'nail', 'neck', 'nostril', 'palm', 'pinkie', 'pupil', 'scalp', 'shin', 'shoulder', 'sideburns', 'thigh', 'throat', 'thumb', 'tongue', 'tooth', 'waist', 'wrist']
psychDisorders = ['Alcohol Addiction','Drug Addiction','Caffeine Addiction','Cannabis Addiction','Hallucinogen Addiction','Inhalant Addiction','Opioid Addiction','Sedative, Hypnotic, Anxiolytic Addiction','Stimulant Addiction','Tobacco Addiction','Gambling Addiction','Agoraphobia','Generalized Anxiety Disorder','Panic Disorder','Selective Mutism','Separation Anxiety Disorder','Social Anxiety Disorder','Specific Phobias','Bipolar Disorder','Cyclothymia','Other Bipolar Disorders','Major Depress
['Acupuncturist' 'Adult Entertainment Store' 'Animal Clinic/Hospital'
'Animal Services' 'Artist' 'Artist Live/Work Studio' 'Assembly Hall'
'Auctioneer' 'Auto Dealer' 'Auto Detailing' 'Auto Painter & Body Shop'
'Auto Parking Lot/Parkade' 'Auto Repairs' 'Auto Washer' 'Auto Wholesaler'
'Beauty Services' 'Bed and Breakfast' 'Boat Charter Services'
'Booking Agency' 'Boot & Shoe Repairs' 'Business Services'
'Carpet/Upholstery Cleaner' 'Caterer' 'Club' 'Community Association'
'Computer Services' 'Contractor' 'Contractor - Special Trades'
'Cosmetologist' 'Dance Hall' 'Dating Services' 'ESL Instruction'
'Educational' 'Electrical Contractor'