Skip to content

Instantly share code, notes, and snippets.

@sajjadmurtaza
Last active November 15, 2017 18:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sajjadmurtaza/91d01c99e0e2a176e5ae003fe34a4b3c to your computer and use it in GitHub Desktop.
Save sajjadmurtaza/91d01c99e0e2a176e5ae003fe34a4b3c to your computer and use it in GitHub Desktop.
def schema_matching(imdb, rt, g)
list = []
imdb.each do |imdb_column_name|
d = 999
rt.each do |rt_column_name|
distance = (0..rt_column_name.length).select { |k| imdb_column_name[k] != rt_column_name[k] }.count
distance = distance + (imdb_column_name.length-rt_column_name.length).abs if imdb_column_name.length != rt_column_name.length
list << ["imdb.#{imdb_column_name}", "rt.#{rt_column_name}"] if distance <= imdb_column_name.length/2 and distance < d
d = distance
end
end
find_final_one(list, g)
end
def find_final_one(list, g)
count = 0
last_precision = 0
last_recall = 0
target = []
list.each do |list|
count = (count + g.select { |g| list == g }.count).to_f
precision, recall = (count/list.count.to_f).to_f, (count/10).to_f
target << list if precision >= last_precision and recall >= last_recall
last_precision, last_recall = precision, recall
end
target
end
#-------------- Calling method ---------------#
#---------------------------------------------#
imdb = ["Id", "Name", "YearRange", "ReleaseDate", "Director", "Creator", "Cast", "Duration", "RatingValue", "ContentRating", "Genre", "Url", "Description"]
rt = ["Id", "Name", "Year", "ReleaseDate", "Director", "Creator", "Actors", "Cast", "Language", "Country", "Duration", "RatingValue", "RatingCount", "ReviewCount", "Genre", "FilmingLocations", "Description"]
g = [["imdb.Name", "rt.Name"],["imdb.YearRange", "rt.Year"],["imdb.ReleaseDate", "rt.ReleaseDate"],["imdb.Director", "rt.Director"],["imdb.Creator", "rt.Creator"], ["imdb.Cast", "rt.Cast"],["imdb.Duration", "rt.Duration"],["imdb.RatingValue", "rt.RatingValue"],["imdb.Genre", "rt.Genre"],["imdb.Description", "rt.Description"]]
p schema_matching(imdb, rt, g)
=begin
[["imdb.Id", "rt.Id"], ["imdb.Name", "rt.Name"], ["imdb.ReleaseDate", "rt.ReleaseDate"], ["imdb.Director", "rt.Director"], ["imdb.Creator", "rt.Creator"], ["imdb.Cast", "rt.Cast"], ["imdb.Duration", "rt.Duration"], ["imdb.RatingValue", "rt.RatingValue"], ["imdb.Genre", "rt.Genre"], ["imdb.Description", "rt.Description"]]
=end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment