Skip to content

Instantly share code, notes, and snippets.

@vrootic
Created July 25, 2014 11:08
Show Gist options
  • Save vrootic/f4a47bd85906453a055f to your computer and use it in GitHub Desktop.
Save vrootic/f4a47bd85906453a055f to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import pycurl
import io
def get_content(url):
res = requests.get(url)
return res.content
def html_parser(url):
soup = BeautifulSoup(get_content(url))
all_area = []
for value in soup.findAll('option'):
if int(value.get('value')) != 0:
all_area.append(value.get('value'))
return all_area
def curl_for(area_code):
result = io.BytesIO()
c = pycurl.Curl()
c.setopt(c.URL, "http://esmd.ksepb.gov.tw/waste/search")
c.setopt(c.POSTFIELDS, 'area='+str(area_code)+'&village=0&road=0&words=&key=1')
c.setopt(c.COOKIE, 'ci_session=a%3A4%3A%7Bs%3A10%3A%22session_id%22%3Bs%3A32%3A%22768be533ff2b7bb647419f41e09b8b95%22%3Bs%3A10%3A%22ip_address%22%3Bs%3A13%3A%22172.16.10.254%22%3Bs%3A10%3A%22user_agent%22%3Bs%3A50%3A%22Mozilla%2F5.0+%28Macintosh%3B+Intel+Mac+OS+X+10_9_4%29+App%22%3Bs%3A13%3A%22last_activity%22%3Bs%3A10%3A%221405866958%22%3B%7Dce77320ced2d1e2e683a3bb71990a301')
c.setopt(c.WRITEFUNCTION, result.write)
c.perform()
return result
if __name__ == '__main__':
all_area = html_parser('http://esmd.ksepb.gov.tw/waste/')
result = []
for area_code in all_area:
soup = BeautifulSoup(curl_for("8").getvalue().decode('utf-8'))
for r in soup.findAll("tr"):
result.append(r)
break
#print(len(result))
result = result[18:34]
for r in result:
print(r.find("a").get("href"))
@jack77121
Copy link

This is awesome!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment