Pascal Potvin TheLoneNut

## new_triplet_loss.py
def triplet_loss(y_true, y_pred, alpha = ALPHA):
    """
    Implementation of the triplet loss function

    Arguments:
    y_true -- true labels, required when you define a loss in Keras, you don't need it in this function.
    y_pred -- python list containing three objects:
            anchor -- the encodings for the anchor data
            positive -- the encodings for the positive data (similar to anchor)
            negative -- the encodings for the negative data (different from anchor)

## triplet_loss.py
def triplet_loss(y_true, y_pred, alpha = 0.2):
    """
    Implementation of the triplet loss function

    Arguments:
    y_true -- true labels, required when you define a loss in Keras, not used in this function.
    y_pred -- python list containing three objects:
            anchor:   the encodings for the anchor data
            positive: the encodings for the positive data (similar to anchor)
            negative: the encodings for the negative data (different from anchor)

## base_network.py
def create_base_network(in_dims, out_dims):
    """
    Base network to be shared.
    """
    model = Sequential()
    model.add(BatchNormalization(input_shape=in_dims))
    model.add(LSTM(512, return_sequences=True, dropout=0.2, recurrent_dropout=0.2, implementation=2))
    model.add(LSTM(512, return_sequences=False, dropout=0.2, recurrent_dropout=0.2, implementation=2))
    model.add(BatchNormalization())
    model.add(Dense(512, activation='relu'))

## siamese_network.py
in_dims = (N_MINS, n_feat)
out_dims = N_FACTORS

# Network definition
with tf.device(tf_device):

    # Create the 3 inputs
    anchor_in = Input(shape=in_dims)
    pos_in = Input(shape=in_dims)
    neg_in = Input(shape=in_dims)

## training.py
# Training the model
model.fit(train_data, y_dummie, batch_size=256, epochs=10)

## encoding.py
def traffic_to_encoding(x, model):
    return model.predict(np.array([x]))

## identify.py
def identify_traffic(x, database, model):
    """
    Implements traffic recognition.

    Arguments:
    x -- the traffic to identify
    database -- database containing recognized traffic encodings
    model -- the encoding model

    Returns:

## database.py
database = {}

database['normal'] = traffic_to_encoding(get_example_label(train_cases_df, df_lens, 0), base_network)
database['error2'] = traffic_to_encoding(get_example_label(train_cases_df, df_lens, 1), base_network)

# Prediction on traffic
identify_traffic(x, database, base_network)

## pmi.py
def pmi(df):
    '''
    Calculate the positive pointwise mutal information score for each entry
    https://en.wikipedia.org/wiki/Pointwise_mutual_information
    We use the log( p(y|x)/p(y) ), y being the column, x being the row
    '''
    # Get numpy array from pandas df
    arr = df.as_matrix()

    # p(y|x) probability of each t1 overlap within the row

## distance.py
X = pmi(df)
X = normalize(X, copy=False)
tree = BallTree(X, p=2)
knn_d, knn_ix = tree.query([X[<specific_example>]], k=10, return_distance=True)
	def triplet_loss(y_true, y_pred, alpha = ALPHA):
	"""
	Implementation of the triplet loss function

	Arguments:
	y_true -- true labels, required when you define a loss in Keras, you don't need it in this function.
	y_pred -- python list containing three objects:
	anchor -- the encodings for the anchor data
	positive -- the encodings for the positive data (similar to anchor)
	negative -- the encodings for the negative data (different from anchor)
	def create_base_network(in_dims, out_dims):
	"""
	Base network to be shared.
	"""
	model = Sequential()
	model.add(BatchNormalization(input_shape=in_dims))
	model.add(LSTM(512, return_sequences=True, dropout=0.2, recurrent_dropout=0.2, implementation=2))
	model.add(LSTM(512, return_sequences=False, dropout=0.2, recurrent_dropout=0.2, implementation=2))
	model.add(BatchNormalization())
	model.add(Dense(512, activation='relu'))
	in_dims = (N_MINS, n_feat)
	out_dims = N_FACTORS

	# Network definition
	with tf.device(tf_device):

	# Create the 3 inputs
	anchor_in = Input(shape=in_dims)
	pos_in = Input(shape=in_dims)
	neg_in = Input(shape=in_dims)
	# Training the model
	model.fit(train_data, y_dummie, batch_size=256, epochs=10)
	def traffic_to_encoding(x, model):
	return model.predict(np.array([x]))
	def identify_traffic(x, database, model):
	"""
	Implements traffic recognition.

	Arguments:
	x -- the traffic to identify
	database -- database containing recognized traffic encodings
	model -- the encoding model

	Returns:
	database = {}

	database['normal'] = traffic_to_encoding(get_example_label(train_cases_df, df_lens, 0), base_network)
	database['error2'] = traffic_to_encoding(get_example_label(train_cases_df, df_lens, 1), base_network)

	# Prediction on traffic
	identify_traffic(x, database, base_network)
	def pmi(df):
	'''
	Calculate the positive pointwise mutal information score for each entry
	https://en.wikipedia.org/wiki/Pointwise_mutual_information
	We use the log( p(y\|x)/p(y) ), y being the column, x being the row
	'''
	# Get numpy array from pandas df
	arr = df.as_matrix()

	# p(y\|x) probability of each t1 overlap within the row
	X = pmi(df)
	X = normalize(X, copy=False)
	tree = BallTree(X, p=2)
	knn_d, knn_ix = tree.query([X[<specific_example>]], k=10, return_distance=True)