Skip to content

Instantly share code, notes, and snippets.

@NorimasaNabeta
Created December 3, 2013 22:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NorimasaNabeta/7778407 to your computer and use it in GitHub Desktop.
Save NorimasaNabeta/7778407 to your computer and use it in GitHub Desktop.
艦コレWiki に登録されている「艦娘」の台詞をデータベースに落とす。
# -*- mode: python; coding: utf-8 -*-
#
# Time-stamp: <2013-12-04 07:19:58 NorimasaNabeta>
#
import urllib2
import re
import sqlite3;
from bs4 import BeautifulSoup
#SETUP DB
#
#
db_filename = "KanColleDb.sqlite"
conn = sqlite3.connect(db_filename)
conn.text_factory=str
sql = u"""create table if not exists KanMusuVoice(cbid INTEGER, kid text, event text, voice text)"""
conn.execute(sql)
sql = u"""insert or replace into KanMusuVoice values(?,?,?,?)"""
#RETRIEVE WIKI
#
#
url = 'http://wikiwiki.jp/kancolle/?%B4%CF%CC%BC%A5%AB%A1%BC%A5%C9%B0%EC%CD%F7'
header = {'User-Agent': 'Mozilla/5.0'}
req = urllib2.Request(url,headers=header)
html_doc = urllib2.urlopen(req)
soup = BeautifulSoup(html_doc )
cbid = 0
for table in soup.findAll("table", { "class" : "style_table" }):
# print table
for entity in table.findAll('a'):
cbid = cbid + 1
kid = entity['title'].encode('utf8')
# print entity['href'].encode('utf8')
req2 = urllib2.Request(entity['href'],headers=header)
html_doc2 = urllib2.urlopen(req2)
soup2 = BeautifulSoup(html_doc2)
for table2 in soup2.findAll("table", { "class" : "style_table" }):
for row in table2.findAll("tr"):
cells = row.findAll("td")
if len(cells) == 2:
title = cells[0].find(text=True)
value = cells[1].find(text=True)
if title != None:
title = title.encode('utf8')
else:
title = ""
if value != None:
value = value.encode('utf8')
else:
value = ""
if not title.isdigit():
print "%(cbid)d,%(kid)s,%(title)s : %(value)s" % {
'cbid':cbid, 'kid':kid, 'title':title, 'value':value }
conn.execute(sql, (cbid,kid,title,value))
conn.commit()
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment