Skip to content

Instantly share code, notes, and snippets.

@poochin
Created March 25, 2011 04:18
Show Gist options
  • Save poochin/886355 to your computer and use it in GitHub Desktop.
Save poochin/886355 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
# scraping newer manga of club sunday.
import re
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
url = 'http://club.shogakukan.co.jp/kod/new.html'
def main():
rawhtml = urlopen(url).read()
soup = BeautifulSoup(rawhtml)
container = soup.find('div', id='mainWrapper')
rawlinks = re.findall("(?<=openViewer\(').+(?=&amp;otk)", str(container))
links = [link.replace('&amp;', '&') for link in rawlinks]
print links
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment