Skip to content

Instantly share code, notes, and snippets.

@D3MZ
Last active December 19, 2015 07:59
Show Gist options
  • Save D3MZ/5922172 to your computer and use it in GitHub Desktop.
Save D3MZ/5922172 to your computer and use it in GitHub Desktop.
#Demetrius Michael
#arrrwalktheplank@gmail.com
require 'require_all'
require_all './lib/'
require 'string_scorer'
require 'active_support/core_ext/string'
require 'pp'
require 'parallel'
def normalize_string object
object.is_a?(String) ? object.titleize.strip.gsub(/[()]/, "") : object
end
def csv_to_hash path
CSV.read(path, headers:true, header_converters: :symbol, converters: :all).collect { |row| Hash[row.collect { |c,r| [c, normalize_string(r)] }] }
end
def name_match? hs, government
scores = [hs[:cemetery_name],hs[:alt_cemetery_name]].compact.product([government[:business_name],government[:does_business_as]].compact).collect { |a,b| a.score b }
!scores.select {|score| score > 0.2 }.empty?
end
def address_match? hs, government
![hs[:township],hs[:town],hs[:current_municipality]].compact.select {|a| regex_proper(government[:bus_address_line1], /#{a}/i) || regex_proper(government[:bus_address_line2], /#{a}/i) }.empty?
end
def regex_proper string, regex
string[regex] if string.is_a?(String)
end
gov = csv_to_hash "this is the path to file one"
hs = csv_to_hash "this is the path to file two"
x = Parallel.map(hs) do |c|
matched = gov.select {|cg| name_match? c, cg }.select {|cg| address_match? c, cg }.first
c.merge(matched) if matched
end.compact
pp x.length
x.to_csv ENV['HOME'] + "/cem_less_strict.csv"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment