Created
February 23, 2017 13:02
-
-
Save florianmski/0511c19a79909900e923978bea453137 to your computer and use it in GitHub Desktop.
A script to automatize what you already do by hand anyway on wg gesucht (i.e clicking on the ads and sending your saved message)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'mechanize' | |
DEBUG = true | |
SEPARATOR = '========' | |
BASE_URL = 'https://www.wg-gesucht.de' | |
LANG = 'en' | |
CITY = 'berlin' | |
# this is what you find in the url when tweaking the filtering of the results | |
FILTER = 'a6349c9d478d85ff9590d585f4dd1aa689187c5117790d1f43' | |
URL = "#{BASE_URL}/#{LANG}/wg-zimmer-in-#{CITY}.8.0.0.0.html?filter=#{FILTER}" | |
def log text | |
if DEBUG | |
puts text | |
end | |
end | |
def pretty_log text | |
log SEPARATOR | |
log text | |
log SEPARATOR | |
end | |
def login page | |
page = page.form_with(:id => 'login_form') do |form| | |
form.rsLoginEmail = 'YOUR_EMAIL' | |
form.rsLoginPasswort = 'YOUR_PASSWORD' | |
end.submit | |
page | |
end | |
def logged_in? page | |
!page.link_with(:text => 'Sign Out').nil? | |
end | |
def next_page agent, page | |
next_page_link = page.at("//ul[@class='pagination pagination-sm']/li/a[strong='»']") | |
if next_page_link == nil || next_page_link.parent['class'] == "disabled" | |
nil | |
else | |
agent.click(next_page_link) | |
end | |
end | |
def get_id link | |
link.href[/.*\.(.*).html/, 1] | |
end | |
def random_sleep low, high | |
sleep(rand(low..high)) | |
end | |
def clean_links links, contacted_ad_ids | |
# reject duplicate, nil links and ads already contacted | |
links.uniq! { |link| link.href } | |
links.reject! { |link| link.href.nil? || contacted_ad_ids.include?(get_id(link)) } | |
links | |
end | |
agent = Mechanize.new | |
agent.get(URL) do |page| | |
log "Login..." | |
random_sleep 0, 2 | |
home_page = login page | |
if !logged_in? home_page | |
log "Error with login, aborting" | |
exit | |
end | |
log "Logged in!" | |
log "Retreving already contacted ads..." | |
random_sleep 0, 2 | |
# get urls of already contacted ads, with high number of contacted ads, check if the page is paginated or not | |
contacted_ads_page = agent.click(home_page.link_with(:text => 'Contacted Ads')) | |
contacted_ad_links = contacted_ads_page.links_with(:class => 'fav_klein', :href => /^((?!javascript).)*$/) | |
contacted_ad_ids = contacted_ad_links.map { |link| get_id(link) } | |
# get links for every ads that match the filter (need to loop through pages) | |
page_nb = 1 | |
links = [] | |
while !home_page.nil? do | |
pretty_log "Page #{page_nb}" | |
random_sleep 2, 10 | |
page_links = home_page.links_with(:class => /ansicht/) | |
filtered_links = clean_links(page_links, contacted_ad_ids) | |
log "#{filtered_links.size} new ads..." | |
links += filtered_links | |
home_page = next_page agent, home_page | |
page_nb += 1 | |
end | |
current_url = agent.page.uri.to_s | |
if current_url.include? "cuba" | |
"WG-Gesucht busted us! You need to go to #{current_url} and enter a captcha" | |
exit | |
end | |
log "#{links.size} messages to send..." | |
random_sleep 0, 2 | |
# for each ads | |
links.each do |link| | |
# go the details | |
detail_page = agent.click(link) | |
# capture name | |
name = "there" | |
element = detail_page.at("//div[@class='contact2Column' and @style='text-transform: capitalize']/b") | |
if element != nil | |
name = element.text.split.first | |
end | |
# go to send message | |
message_link = detail_page.link_with(:class => 'btn btn-block btn-md btn-orange') | |
message_page = agent.click(message_link) | |
# replace name | |
message_page.form_with(:name => 'msg_form') do |form| | |
form.nachricht = "Hey #{name}, | |
YOUR_MESSAGE" | |
# send message | |
end.submit | |
log "Message to #{name} for #{get_id(link)} sent!" | |
random_sleep 2, 10 | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tried this again not so long ago and unfortunately it doesn't work anymore as it rely on very specific DOM elements, this should be a good base to create a functional one anyway.