Skip to content

Instantly share code, notes, and snippets.

@rb-roomba
Created January 16, 2016 06:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rb-roomba/09f74db27f461f428053 to your computer and use it in GitHub Desktop.
Save rb-roomba/09f74db27f461f428053 to your computer and use it in GitHub Desktop.
test
#! /usr/bin/python
# -*- coding: utf-8 -*-
from HTMLParser import HTMLParser
import urllib2
def parse_ranking(ranking_html):
id_list = []
class WordParser(HTMLParser):# HTMLパーサ
def __init__(self):
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag == 'div':
try:
if attrs[0][0]=="class" and attrs[0][1]=="book book_of_hondana":
id_list.append(attrs[1][1][attrs[1][1].find('_')+1:])
except:
pass
wp = WordParser()
wp.feed(ranking_html)
wp.close()
return id_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment