Skip to content

Instantly share code, notes, and snippets.

@sajjadmurtaza
Last active November 16, 2017 20:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sajjadmurtaza/c1a6c19f1c8d629c8bce71a07f88dc19 to your computer and use it in GitHub Desktop.
Save sajjadmurtaza/c1a6c19f1c8d629c8bce71a07f88dc19 to your computer and use it in GitHub Desktop.
def find_final_one(list, g)
count = 0
last_precision = 0
last_recall = 0
target = []
list.each do |list|
count = (count + g.select { |g| list == g }.count).to_f
precision, recall = (count/list.count.to_f).to_f, (count/10).to_f
target << list if precision >= last_precision and recall >= last_recall
last_precision, last_recall = precision, recall
end
target
end
require './find_final_one'
require 'csv'
imdb_csv = CSV.read("imdb.csv", :headers => true)
rotten_tomatoes_csv = CSV.read("rotten_tomatoes.csv", :headers => true)
g = [["imdb.Name", "rt.Name"],["imdb.YearRange", "rt.Year"],["imdb.ReleaseDate", "rt.ReleaseDate"],["imdb.Director", "rt.Director"],["imdb.Creator", "rt.Creator"], ["imdb.Cast", "rt.Cast"],["imdb.Duration", "rt.Duration"],["imdb.RatingValue", "rt.RatingValue"],["imdb.Genre", "rt.Genre"],["imdb.Description", "rt.Description"]]
imdb = imdb_csv.to_a
rt = rotten_tomatoes_csv.to_a
def get_list(imdb, rt, g)
i_row = imdb.count/imdb[0].count
i_col = imdb[0].count
r_row = rt.count/rt[0].count
r_col = rt[0].count
count = -1
temp = []
list = []
for j in 0..(i_col-1) do
d = 0
for l in 0..(r_col-1) do
sim_total = 0
for i in 1..(i_row-1) do
str1 = imdb[i][j]
length1 = str1.nil? ? 0 : str1.length
sim_single = 0
st1_type = str1.class
for k in 1..(r_row-1) do
str2 = rt[k][l]
length2 = str2.nil? ? 0 : str2.length
distance = 0
st2_type = str2.class
m = 0
while m < length1 and m < length2 do
if str1[m] != str2[m]
distance = distance + 1
end
m = m + 1
end
if length1 != length2
distance = distance + (length1-length2).abs
end
type = 0
if st1_type == st2_type
type = 1
end
similarity = 0
if distance <= length1/2
similarity = 1
end
sim_single = sim_single + similarity + type
end
sim_total = sim_total + sim_single
end
if sim_total > d and sim_total >= i_row/2
list << ["imdb.#{imdb[0][j]}", "rt.#{rt[0][l]}"]
d = sim_total
end
end
end
find_final_one(list, g)
end
p get_list(imdb, rt, g)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment