Created
March 18, 2011 10:28
-
-
Save jarvist/875867 to your computer and use it in GitHub Desktop.
Beautiful Soup / Python code to monitor LFGSS message board and spawn a chrome tab on new adverts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from BeautifulSoup import BeautifulSoup | |
import urllib2 | |
import re | |
import time | |
import sys | |
import os | |
user_agent = 'Mozilla/5 (Solaris 10) Gecko' | |
headers = { 'User-Agent' : user_agent } | |
request = urllib2.Request("http://www.lfgss.com/search.php?do=process&prefixchoice[]=ForSale&excludeclosed=1&nocache=1",None,headers) | |
page=urllib2.urlopen(request) | |
soup = BeautifulSoup(page) | |
known = [ ] | |
#print soup.prettify() | |
print "Right, to crunch" | |
for thread in soup("a", id=re.compile("thread_title")): | |
print thread | |
known.append(thread['id']) | |
print "Seeded dictionary with: " | |
for know in known: | |
print know | |
while(1): | |
time.sleep(60) | |
print ".", #scrolling info on grabs | |
sys.stdout.flush() #flush buffer so it actually displays :^) | |
page = urllib2.urlopen(request) | |
soup = BeautifulSoup(page) | |
for thread in soup("a", id=re.compile("thread_title")): | |
if thread['id'] not in known: | |
print thread | |
cmd="google-chrome \"http://www.lfgss.com/"+thread['href']+"\"" | |
os.system(cmd) | |
known.append(thread['id']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Updated 2012-08-16 to spoof 'user-agent' so that LFGSS will play ball, and changed to google-chrome for the webpages.