Skip to content

Instantly share code, notes, and snippets.

@AyumuKasuga
Created June 21, 2012 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AyumuKasuga/2965964 to your computer and use it in GitHub Desktop.
Save AyumuKasuga/2965964 to your computer and use it in GitHub Desktop.
save images from 2ch.so
#!/usr/bin/env python2
#coding: utf-8
import gevent.monkey
gevent.monkey.patch_socket()
import sys
import os
from urlparse import urlparse
import urllib
from lxml.html import document_fromstring
import gevent
from gevent.queue import Queue
def parser_2ch(src):
doc = document_fromstring(src)
images = doc.xpath('//a/img[@class="img"]')
listimages = ['http://2ch.so' + i.getparent().attrib['href'] for i in images]
threadname = doc.xpath('//span[@class="filetitle"]')[0].text
return {'threaname': threadname, 'images': listimages}
def download_and_save(folder):
while not q.empty():
link = q.get()
path = '%s/%s' % (folder, link.split('/')[-1])
if os.path.exists(path):
#print link, 'already exists'
pass
else:
src = urllib.urlopen(link).read()
with open(path, 'w') as f:
f.write(src)
#print link, len(src), 'saved'
status_str = 'download: %s/%s' % (allimglen - q.qsize(), allimglen)
sys.stdout.write('\r')
sys.stdout.write(status_str)
sys.stdout.flush()
def folder_name(url):
s = url.split('/')
return "%s_%s_%s" % (s[-4], s[-3], s[-1].replace('.html', ''))
try:
srcurl = sys.argv[1]
except IndexError:
print 'use: saveimg.py url [folder]'
sys.exit()
try:
userfolder = sys.argv[2]
except IndexError:
userfolder = False
chans = {
'2ch.so': parser_2ch
}
threads = 5
host = urlparse(srcurl).netloc
folder = userfolder or folder_name(srcurl)
try:
os.mkdir(folder)
except OSError as e:
pass
if host in chans.keys():
src = urllib.urlopen(srcurl).read()
img = chans[host](src)
allimglen = len(img['images'])
q = Queue()
[q.put_nowait(i) for i in img['images']]
gevent.joinall([gevent.spawn(download_and_save, folder) for i in range(threads)])
print '\ndone!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment