Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# coding=utf8
import urllib
import re
from bs4 import BeautifulSoup as BS
url = 'http://www.dgpa.gov.tw/'
html_content = urllib.urlopen(url).read()
soup_obj = BS(html_content, 'html.parser')
texts = []
for string in soup_obj.stripped_strings: # get all string from page
texts.append(string.encode('utf8'))
for text in texts:
if re.search(r'^105', text):
print 'title: ', text
if re.search(u'臺北市', text.decode('utf8')):
print 'city: ', text
info = texts[texts.index(text)+1] # get next line
print 'info: ', info
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.