Skip to content

Instantly share code, notes, and snippets.

@shrkw
Last active November 6, 2016 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shrkw/4c3a8ba7fb8bed47bde47353f2dd9225 to your computer and use it in GitHub Desktop.
Save shrkw/4c3a8ba7fb8bed47bde47353f2dd9225 to your computer and use it in GitHub Desktop.
字画の良い名前候補 http://enamae.net/
from bs4 import BeautifulSoup
import urllib2
import urllib
import codecs
def candidate(url):
req = urllib2.Request(url)
USERAGENT = 'Mozilla/5.0'
req.add_header("User-agent", USERAGENT)
soup = BeautifulSoup(urllib2.urlopen(req), "html.parser")
return filter(lambda s: "yomi" in str(s), soup.find_all("li"))
def main(last_name):
q = urllib.quote(last_name.encode('utf-8'))
base = "http://enamae.net/meilist/m/" + q
urls = [base] + [base + "?p=" + str(i) for i in range(2, 20)]
with codecs.open('jikaku_' + q + '.txt', 'w', 'utf-8') as f:
for url in urls:
res = candidate(url)
for row in res:
f.write(row.text)
f.write("\n")
main(u'白川')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment