Created
December 3, 2013 22:08
-
-
Save NorimasaNabeta/7778407 to your computer and use it in GitHub Desktop.
艦コレWiki に登録されている「艦娘」の台詞をデータベースに落とす。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- mode: python; coding: utf-8 -*- | |
# | |
# Time-stamp: <2013-12-04 07:19:58 NorimasaNabeta> | |
# | |
import urllib2 | |
import re | |
import sqlite3; | |
from bs4 import BeautifulSoup | |
#SETUP DB | |
# | |
# | |
db_filename = "KanColleDb.sqlite" | |
conn = sqlite3.connect(db_filename) | |
conn.text_factory=str | |
sql = u"""create table if not exists KanMusuVoice(cbid INTEGER, kid text, event text, voice text)""" | |
conn.execute(sql) | |
sql = u"""insert or replace into KanMusuVoice values(?,?,?,?)""" | |
#RETRIEVE WIKI | |
# | |
# | |
url = 'http://wikiwiki.jp/kancolle/?%B4%CF%CC%BC%A5%AB%A1%BC%A5%C9%B0%EC%CD%F7' | |
header = {'User-Agent': 'Mozilla/5.0'} | |
req = urllib2.Request(url,headers=header) | |
html_doc = urllib2.urlopen(req) | |
soup = BeautifulSoup(html_doc ) | |
cbid = 0 | |
for table in soup.findAll("table", { "class" : "style_table" }): | |
# print table | |
for entity in table.findAll('a'): | |
cbid = cbid + 1 | |
kid = entity['title'].encode('utf8') | |
# print entity['href'].encode('utf8') | |
req2 = urllib2.Request(entity['href'],headers=header) | |
html_doc2 = urllib2.urlopen(req2) | |
soup2 = BeautifulSoup(html_doc2) | |
for table2 in soup2.findAll("table", { "class" : "style_table" }): | |
for row in table2.findAll("tr"): | |
cells = row.findAll("td") | |
if len(cells) == 2: | |
title = cells[0].find(text=True) | |
value = cells[1].find(text=True) | |
if title != None: | |
title = title.encode('utf8') | |
else: | |
title = "" | |
if value != None: | |
value = value.encode('utf8') | |
else: | |
value = "" | |
if not title.isdigit(): | |
print "%(cbid)d,%(kid)s,%(title)s : %(value)s" % { | |
'cbid':cbid, 'kid':kid, 'title':title, 'value':value } | |
conn.execute(sql, (cbid,kid,title,value)) | |
conn.commit() | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment