Skip to content

Instantly share code, notes, and snippets.

@malev
Created May 5, 2014 16:36
Show Gist options
  • Save malev/f2e5a2d37f0fea5d4a74 to your computer and use it in GitHub Desktop.
Save malev/f2e5a2d37f0fea5d4a74 to your computer and use it in GitHub Desktop.
Gender detection testing app
require 'csv'
require 'net/http'
require 'json'
require 'beauvoir'
require 'sexmachine'
names_with_gender = []
CSV.foreach('input.csv') do |row|
names_with_gender << row
end
class Genderize
attr_reader :names_with_gender, :names
def initialize(names=[])
@names = names
@names_with_gender = []
if File.exists?('cache.json')
call_cache
else
call
end
end
def guess(name)
names_with_gender[@names.index(name)][1]
end
def call
parse(Net::HTTP.get(uri))
end
def call_cache
parse(File.open('cache.json').readlines().first)
end
def parse(res)
JSON.parse(res).each do |genderize|
names << genderize['name']
names_with_gender << [genderize['name'], clean_gender(genderize['gender'])]
end
end
def clean_gender(gender)
if gender
gender
else
"unknown"
end
end
def uri
pre_query_string = []
names.each_with_index do |name, index|
pre_query_string << "name[#{index}]=#{name}"
end
URI("http://api.genderize.io?" + pre_query_string.join("&"))
end
end
class SexMachinizer
def initialize
@detector = SexMachine::Detector.new(unknown_value: 'unknown')
end
def guess(name)
clean(@detector.get_gender(name).to_s)
end
def clean(gender)
if /\_/ =~ gender
gender.split('_')[1]
else
gender
end
end
end
beauvoir = Beauvoir::Categorizer.new
sex_machinizer = SexMachinizer.new
genderize = Genderize.new
CSV.open('output.csv', 'w') do |csv|
names_with_gender.sort_by { |name_with_gender| name_with_gender[0] }.each do |row|
row << beauvoir.guess(row[0])
row << sex_machinizer.guess(row[0])
row << genderize.guess(row[0])
csv << row
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment