techscientist/model_to_release.py

## model_to_release.py
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout, TimeDistributedDense
from keras.layers.recurrent import LSTM, SimpleRNN

def build_model(num_layers=2, num_units=256, maxlen_rnn=50, dim_label=50):
    '''
    num_layers: in [2, 3]
    num_units: in [256, 512, 1024]

    '''
    model = Sequential()

    for layer_idx in range(num_layers):
        if layer_idx == 0:
            model.add(LSTM(output_dim=num_units,
                           return_sequences=True,
                           input_shape=(maxlen_rnn, dim_label)))
        else:
            model.add(LSTM(output_dim=num_units,
                           return_sequences=True))

        if layer_idx != num_layers-1:
            model.add(Dropout(0.2))

    model.add(TimeDistributedDense(output_dim=dim_label, activation='sigmoid')) # for many-to-many
	# model.add(Dense(output_dim=dim_label, activation='sigmoid')) # for many-to-one

    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

## my_metrics.py
"""
https://gist.github.com/bwhite/3726239
"""
import numpy as np
from scipy import spatial
from sklearn.metrics.pairwise import pairwise_distances

def compute_similarity(candidate_song_feature, reference_song_feature, function_name):
    '''
    function name in ['l2', 'cosine', 'dcg']
    '''
    if function_name not in ['l2', 'cosine', 'dcg']:
        raise RuntimeError('Wrong similarity function name,%s' % function_name)
    a = candidate_song_feature
    b = reference_song_feature
    if function_name == 'cosine':
        return pairwise_distances(a.reshape(1,-1),b.reshape(1,-1), metric='cosine')
    elif function_name == 'l2':
        return pairwise_distances(a.reshape(1,-1),b.reshape(1,-1), metric='euclidean')
    elif function_name == 'dcg':
        return dcg_wrapper(a,b)

def dcg_wrapper(pred,truth):
    ''' input: values, *not rank*.
    Higher values are more relevant.
    combine two vectors to make it a single ranking estimation.
    '''
    # reverse values and pred for easier computation of DCG.
    # from now, the smaller, more relevant
    pred = 1 - pred # max(pred)==1
    truth = 1 - truth
    # make pred as ranking. i.e. higher rank (0) is more relevant.
    pred = np.argsort(pred)
    # now sort it again w.r.t. truth ranking.
    pred_ranking = [pred[i] for i in np.argsort(truth)]
    return dcg_at_k(r=pred_ranking, k=10)

def dcg_at_k(r, k, method=0):
    '''Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    '''
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1))) # was sum
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2))) # was sum
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.

def average_distance(feature_pairs, function_name):
    ''' return average distance of the pairs in the input.
    '''
    ret = 0
    for pair in feature_pairs:
        ret += compute_similarity(pair[0], pair[1], function_name)

    return ret / len(feature_pairs)

def get_internal_distance(features, function_name):
    ''' return internal average distance of a song
    features: list of features
    '''
    ret = 0
    num_pair = 0
    for i, feat_i in enumerate(features):
        for j, feat_j in enumerate(features[i+1:]):
            ret += compute_similarity(feat_i, feat_j, function_name)
            num_pair += 0
    if num_pair == 0:
        return 0
    return ret / num_pair

def get_1_vs_all_distance(features, a_feature, function_name):
    return average_distance([[a_feature, feat] for feat in features], function_name)
	import keras
	from keras.models import Sequential
	from keras.layers.core import Dense, Activation, Dropout, TimeDistributedDense
	from keras.layers.recurrent import LSTM, SimpleRNN

	def build_model(num_layers=2, num_units=256, maxlen_rnn=50, dim_label=50):
	'''
	num_layers: in [2, 3]
	num_units: in [256, 512, 1024]

	'''
	model = Sequential()

	for layer_idx in range(num_layers):
	if layer_idx == 0:
	model.add(LSTM(output_dim=num_units,
	return_sequences=True,
	input_shape=(maxlen_rnn, dim_label)))
	else:
	model.add(LSTM(output_dim=num_units,
	return_sequences=True))

	if layer_idx != num_layers-1:
	model.add(Dropout(0.2))

	model.add(TimeDistributedDense(output_dim=dim_label, activation='sigmoid')) # for many-to-many
	# model.add(Dense(output_dim=dim_label, activation='sigmoid')) # for many-to-one

	model.compile(loss='binary_crossentropy', optimizer='adam')
	return model
	"""
	https://gist.github.com/bwhite/3726239
	"""
	import numpy as np
	from scipy import spatial
	from sklearn.metrics.pairwise import pairwise_distances

	def compute_similarity(candidate_song_feature, reference_song_feature, function_name):
	'''
	function name in ['l2', 'cosine', 'dcg']
	'''
	if function_name not in ['l2', 'cosine', 'dcg']:
	raise RuntimeError('Wrong similarity function name,%s' % function_name)
	a = candidate_song_feature
	b = reference_song_feature
	if function_name == 'cosine':
	return pairwise_distances(a.reshape(1,-1),b.reshape(1,-1), metric='cosine')
	elif function_name == 'l2':
	return pairwise_distances(a.reshape(1,-1),b.reshape(1,-1), metric='euclidean')
	elif function_name == 'dcg':
	return dcg_wrapper(a,b)

	def dcg_wrapper(pred,truth):
	''' input: values, not rank.
	Higher values are more relevant.
	combine two vectors to make it a single ranking estimation.
	'''
	# reverse values and pred for easier computation of DCG.
	# from now, the smaller, more relevant
	pred = 1 - pred # max(pred)==1
	truth = 1 - truth
	# make pred as ranking. i.e. higher rank (0) is more relevant.
	pred = np.argsort(pred)
	# now sort it again w.r.t. truth ranking.
	pred_ranking = [pred[i] for i in np.argsort(truth)]
	return dcg_at_k(r=pred_ranking, k=10)

	def dcg_at_k(r, k, method=0):
	'''Score is discounted cumulative gain (dcg)
	Relevance is positive real values. Can use binary
	as the previous methods.
	Example from
	http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
	>>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
	>>> dcg_at_k(r, 1)
	3.0
	>>> dcg_at_k(r, 1, method=1)
	3.0
	>>> dcg_at_k(r, 2)
	5.0
	>>> dcg_at_k(r, 2, method=1)
	4.2618595071429155
	>>> dcg_at_k(r, 10)
	9.6051177391888114
	>>> dcg_at_k(r, 11)
	9.6051177391888114
	Args:
	r: Relevance scores (list or numpy) in rank order
	(first element is the first item)
	k: Number of results to consider
	method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
	If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
	Returns:
	Discounted cumulative gain
	'''
	r = np.asfarray(r)[:k]
	if r.size:
	if method == 0:
	return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1))) # was sum
	elif method == 1:
	return np.sum(r / np.log2(np.arange(2, r.size + 2))) # was sum
	else:
	raise ValueError('method must be 0 or 1.')
	return 0.

	def average_distance(feature_pairs, function_name):
	''' return average distance of the pairs in the input.
	'''
	ret = 0
	for pair in feature_pairs:
	ret += compute_similarity(pair[0], pair[1], function_name)

	return ret / len(feature_pairs)

	def get_internal_distance(features, function_name):
	''' return internal average distance of a song
	features: list of features
	'''
	ret = 0
	num_pair = 0
	for i, feat_i in enumerate(features):
	for j, feat_j in enumerate(features[i+1:]):
	ret += compute_similarity(feat_i, feat_j, function_name)
	num_pair += 0
	if num_pair == 0:
	return 0
	return ret / num_pair

	def get_1_vs_all_distance(features, a_feature, function_name):
	return average_distance([[a_feature, feat] for feat in features], function_name)