Skip to content

Instantly share code, notes, and snippets.

@baharefatemi
Created April 19, 2017 23:02
Show Gist options
  • Save baharefatemi/e4338625f00e4f623d236dbe0782b81e to your computer and use it in GitHub Desktop.
Save baharefatemi/e4338625f00e4f623d236dbe0782b81e to your computer and use it in GitHub Desktop.
base_file = File.read("u.data")
tag_file = File.read("u.user")
train_file = File.open("train.csv", "w")
test_file = File.open("test.csv", "w")
train_users = Array.new
test_users = Array.new
genders = Hash.new
items = Array.new
number_of_times_rated = Hash.new
ratings = Hash.new
splitted = Array.new
tag_file.each_line.with_index do |line, i|
splitted = line.split("|")
user = splitted[0].to_i
gender = splitted[2]
genders[user] = gender
end
base_file.each_line.with_index do |line, i|
splitted = line.split(" ")
user = splitted[0].to_i
item = splitted[1].to_i
rating = splitted[2].to_i
time_stamp = splitted[3].to_i
if(time_stamp <= 880845177)
if(not train_users.include? user)
train_users << user
ratings[user] = Hash.new
end
if(not items.include? item)
items << item
number_of_times_rated[item] = 0
end
ratings[user][item] = [rating, time_stamp]
number_of_times_rated[item] = number_of_times_rated[item].to_i + 1
elsif(time_stamp <= 884673930)
if(not test_users.include? user)
test_users << user
ratings[user] = Hash.new
end
if(not items.include? item)
items << item
# number_of_times_rated[item] = 0
end
ratings[user][item] = [rating, time_stamp]
# number_of_times_rated[item] += 1
end
end
test_users -= train_users
number_of_times_rated = number_of_times_rated.sort_by {|key, value| value}
output_file = File.open("log_new.txt", "w")
number_of_times_rated.each do |n|
new_line = ""
new_line << n[0].to_s
new_line << "^"
new_line << n[1].to_s
output_file.puts(new_line)
end
base_file = File.read("u.data")
tag_file = File.read("u.user")
train_file = File.open("train.csv", "w")
test_file = File.open("test.csv", "w")
train_users = Array.new
test_users = Array.new
genders = Hash.new
items = Array.new
number_of_times_rated = Hash.new
ratings = Hash.new
splitted = Array.new
tag_file.each_line.with_index do |line, i|
splitted = line.split("|")
user = splitted[0].to_i
gender = splitted[2]
genders[user] = gender
end
base_file.each_line.with_index do |line, i|
splitted = line.split(" ")
user = splitted[0].to_i
item = splitted[1].to_i
rating = splitted[2].to_i
time_stamp = splitted[3].to_i
if(time_stamp <= 880845177)
if(not train_users.include? user)
train_users << user
ratings[user] = Hash.new
end
if(not items.include? item)
items << item
number_of_times_rated[item] = 0
end
ratings[user][item] = [rating, time_stamp]
number_of_times_rated[item] = number_of_times_rated[item].to_i + 1
elsif(time_stamp <= 884673930)
if(not test_users.include? user)
test_users << user
ratings[user] = Hash.new
end
if(not items.include? item)
items << item
# number_of_times_rated[item] = 0
end
ratings[user][item] = [rating, time_stamp]
# number_of_times_rated[item] += 1
end
end
test_users -= train_users
number_of_times_rated = number_of_times_rated.sort_by {|key, value| value}
output_file = File.open("log_new.txt", "w")
number_of_times_rated.each do |n|
new_line = ""
new_line << n[0].to_s
new_line << "^"
new_line << n[1].to_s
output_file.puts(new_line)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment