Skip to content

Instantly share code, notes, and snippets.

@kamalbanga
Last active May 12, 2016 09:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kamalbanga/7bb00ece0d6cbff6bc6aff868600b966 to your computer and use it in GitHub Desktop.
Save kamalbanga/7bb00ece0d6cbff6bc6aff868600b966 to your computer and use it in GitHub Desktop.
Generate random data for input to CTM algorithm. Run as "python2.7 ctmdata.py".
from random import randint
from random import uniform, gauss
def gaussian(mu, sigma = 0.1):
prob = gauss(mu, sigma)
if prob < 0:
return 0.0
elif prob > 1:
return 1.0
else:
return round(prob, 2)
N = 10 # no. of news
G = 3 # no. of groups of users
K = 5 # no. of users per group
sparse_prob = 0.2 # this times 100% is the percentage sparsity
U = G * K # total no. of users = no. of groups times no. of users per group
groups = {} # map of users to groups
group_ratings = [[0 for i in range(G)] for j in range(N)] # 2-D array of ratings of the collective group for each news
ratings = [[0 for i in range(U)] for j in range(N)] # 2-D array of ratings of each user for each news
for i in range(G):
for j in range(i*(U/G), (i+1)*(U/G)):
groups[j] = i
approxRatings = [0.3, 0.7] # ratings to be given
for i in range(N):
for j in range(G):
group_ratings[i][j] = approxRatings[randint(0, len(approxRatings) - 1)]
for n in range(N):
for u in range(U):
ratings[n][u] = 0
if sparse_prob < uniform(0, 1): # probability that the user has rated the
ratings[n][u] = gaussian(group_ratings[n][groups[u]])
for r in ratings:
print r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment