Skip to content

Instantly share code, notes, and snippets.

@robmckinnon
Created January 1, 2009 04:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robmckinnon/42184 to your computer and use it in GitHub Desktop.
Save robmckinnon/42184 to your computer and use it in GitHub Desktop.
grabs soulmate profiles
require 'rubygems'; require 'pottery'; require 'mechanize'; require 'hpricot'; require 'open-uri'; require 'active_support'
class SoulMate
include Pottery
class << self
def find age='29', postcode='N1'
ids = find_ids age.to_s, postcode
ids.collect do |id|
soul_mate = restore(id.to_s)
unless soul_mate
begin
soul_mate = SoulMate.new
soul_mate.load_data id
soul_mate.save
rescue Exception => e
puts e.to_s
end
end
soul_mate
end.compact
end
def women
saved_by_gender 'Female'
end
def men
saved_by_gender 'Male'
end
private
def saved_by_gender gender
all = Soup.sieve(:gender, "= '#{gender}'")
all.collect {|snip| restore(snip.name)}
end
def find_ids age, postcode
ids = {}
WWW::Mechanize.new.get('http://dating.guardian.co.uk/s/') do |page|
results = page.form_with(:action => '/s/find/search.php') do |f|
f.gender = '2'
f.mGender = '1'
f.mAgeMin = age
f.mAgeMax = age
f.mPostCode = postcode
f.mRangeID = '1'
end.click_button
handle_links ids, results.links
end
ids.keys.sort
end
def handle_links ids, links
done_next = false
links.each do |link|
uri = link.href.strip
if uri[/\/s\/view\/(\d+)\/s\/(\d+)/]
ids[$1] = uri
elsif link.text[/Next/i] && !done_next
handle_links ids, link.click.links
done_next = true
end
end
end
end
public
def height_cm
if height && height[/(\d+)cm/]
$1.to_i
else
0
end
end
def height_match_cm
if height_match && height_match[/(\d+)cm/]
$1.to_i
else
0
end
end
def load_data id
doc = Hpricot open("http://dating.guardian.co.uk/s/view/#{id}")
begin
load_profile doc
morph(:id_name, id)
rescue Exception => e
puts "trouble loading: #{id}"
raise e
end
end
private
def s text
text.to_s.strip
end
def load_profile doc
info = doc.at('#profileInfo')
morph :name => s(info.at('h2/text()')),
:gender => s(info.at('h2/img')['title']),
:headline => s(info.at('#headline/text()')),
:last_modify => info.at('#lastModify/span/text()').to_s.chomp('?').strip,
:last_login => s(info.at('#lastLogin/span/text()')),
:about_self => s(doc.at('#selfBox/p')),
:about_match => s(doc.at('#matchBox/p')),
:image => doc.at('#primaryPhoto/a/img')['src']
load_table doc, '#aboutTable'
load_table doc, '#match', 'match'
load_table doc, '#depth'
end
def load_table doc, id, suffix = ''
(doc.at(id) / 'td/text()').in_groups_of(2).each do |attributes|
label = attributes[0].to_s.strip.tr(',\'','').chomp(':')
value = attributes[1].to_s.gsub('?',' ').strip
morph("#{label} #{suffix}", value)
end
if id == '#match'
(doc.at('#match') / 'tr').select {|row| row.at('td/span')}.each do |row|
label = row.at('td/text()').to_s.strip.tr(',\'','').chomp(':')
value = row.at('td/span/text()').to_s.tr('()','')
morph("#{label}_importance", value)
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment