Last active
May 12, 2016 09:43
-
-
Save kamalbanga/7bb00ece0d6cbff6bc6aff868600b966 to your computer and use it in GitHub Desktop.
Generate random data for input to CTM algorithm. Run as "python2.7 ctmdata.py".
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import randint | |
from random import uniform, gauss | |
def gaussian(mu, sigma = 0.1): | |
prob = gauss(mu, sigma) | |
if prob < 0: | |
return 0.0 | |
elif prob > 1: | |
return 1.0 | |
else: | |
return round(prob, 2) | |
N = 10 # no. of news | |
G = 3 # no. of groups of users | |
K = 5 # no. of users per group | |
sparse_prob = 0.2 # this times 100% is the percentage sparsity | |
U = G * K # total no. of users = no. of groups times no. of users per group | |
groups = {} # map of users to groups | |
group_ratings = [[0 for i in range(G)] for j in range(N)] # 2-D array of ratings of the collective group for each news | |
ratings = [[0 for i in range(U)] for j in range(N)] # 2-D array of ratings of each user for each news | |
for i in range(G): | |
for j in range(i*(U/G), (i+1)*(U/G)): | |
groups[j] = i | |
approxRatings = [0.3, 0.7] # ratings to be given | |
for i in range(N): | |
for j in range(G): | |
group_ratings[i][j] = approxRatings[randint(0, len(approxRatings) - 1)] | |
for n in range(N): | |
for u in range(U): | |
ratings[n][u] = 0 | |
if sparse_prob < uniform(0, 1): # probability that the user has rated the | |
ratings[n][u] = gaussian(group_ratings[n][groups[u]]) | |
for r in ratings: | |
print r |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment