Skip to content

Instantly share code, notes, and snippets.

@zonble
Last active August 29, 2015 13:56
Show Gist options
  • Select an option

  • Save zonble/9156232 to your computer and use it in GitHub Desktop.

Select an option

Save zonble/9156232 to your computer and use it in GitHub Desktop.
斧頭幫第三關 http://axe.g0v.tw/level/3
#!/usr/bin/env python
# encoding: utf-8
# http://axe.g0v.tw/level/3
import urllib, urllib2, re
import cookielib
lines = []
jar = cookielib.FileCookieJar("cookies")
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
def parse_page(index):
url = "http://axe-level-1.herokuapp.com/lv3/" if index == 1 else "http://axe-level-1.herokuapp.com/lv3/?page=next"
f = opener.open(url)
html = f.read()
pattern = r"<tr>\s*<td>(.*)</td>\s*<td>(.*)</td>\s*<td>(.*)</td>\s*</tr>"
results = re.findall(pattern, html, re.MULTILINE)[1:]
line = '{"town": "%s", "village": "%s", "name" : "%s"}'
for result in results:
lines.append(line % tuple(result))
for i in range(1, 77):
parse_page(i)
json_text = "[%s]" % ",\n".join(lines)
with open("test.txt", "w") as f:
f.write(json_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment