JasonTam/neg_samp.py

## neg_samp.py
"""
Pretty efficient way to sample negatives using binary search.
Imagine `pos_inds` are items a user has interacted with,
and we want a vectorized approach to uniformly sample many negative items.
This is better than (in the case where `n_samp` is large)
sampling from the entire catalog and then checking if it's a positive item.

Ahem... negative sampling for sparse interaction matrices.
"""

import numpy as np
import matplotlib.pyplot as plt

def neg_samp(pos_inds, n_items, n_samp=32):
    raw_samp = np.random.randint(0, n_items - len(pos_inds), size=n_samp)
    pos_inds_adj = pos_inds - np.arange(len(pos_inds))
    neg_inds = raw_samp + np.searchsorted(pos_inds_adj, raw_samp, side='right')
    return neg_inds


# Example:
n_items = 25
pos_inds = np.array([3, 9, 22])

plt.hist(neg_samp(pos_inds, n_items, n_samp=99999), bins=range(n_items+1))
	"""
	Pretty efficient way to sample negatives using binary search.
	Imagine `pos_inds` are items a user has interacted with,
	and we want a vectorized approach to uniformly sample many negative items.
	This is better than (in the case where `n_samp` is large)
	sampling from the entire catalog and then checking if it's a positive item.

	Ahem... negative sampling for sparse interaction matrices.
	"""

	import numpy as np
	import matplotlib.pyplot as plt

	def neg_samp(pos_inds, n_items, n_samp=32):
	raw_samp = np.random.randint(0, n_items - len(pos_inds), size=n_samp)
	pos_inds_adj = pos_inds - np.arange(len(pos_inds))
	neg_inds = raw_samp + np.searchsorted(pos_inds_adj, raw_samp, side='right')
	return neg_inds


	# Example:
	n_items = 25
	pos_inds = np.array([3, 9, 22])

	plt.hist(neg_samp(pos_inds, n_items, n_samp=99999), bins=range(n_items+1))