Last active
September 11, 2015 08:48
-
-
Save gitcrtn/964cb90120a8ceeaf551 to your computer and use it in GitHub Desktop.
twitterで1ツイートで2つ以上画像が貼られてるときに一括保存するやつ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# twitter multi pictures downloader for Python 2.7 | |
# 2015.07.25 | |
# @Carotene | |
# How to Use: | |
# 1. Copy to clipboard the twitter page url uploaded pictures more than 2. | |
# 2. Run this script with an argument of save directory. | |
from bs4 import BeautifulSoup | |
from argparse import ArgumentParser | |
import os, sys, urllib, urllib2, datetime, win32clipboard | |
def _reporthook(numblocks, blocksize, filesize, url=None): | |
#print "reporthook(%s, %s, %s)" % (numblocks, blocksize, filesize) | |
base = os.path.basename(url) | |
#XXX Should handle possible filesize=-1. | |
try: | |
percent = min((numblocks*blocksize*100)/filesize, 100) | |
except: | |
percent = 100 | |
if numblocks != 0: | |
sys.stdout.write("\b"*70) | |
sys.stdout.write("%-66s%3d%%" % (base, percent)) | |
def download(url, dst): | |
print "get url '%s' to '%s'" % (url, dst) | |
if sys.stdout.isatty(): | |
urllib.urlretrieve(url, dst, lambda nb, bs, fs, url=url: _reporthook(nb,bs,fs,url)) | |
sys.stdout.write('\n') | |
else: | |
urllib.urlretrieve(url, dst) | |
def clipboard(): | |
win32clipboard.OpenClipboard() | |
cb = win32clipboard.GetClipboardData() | |
win32clipboard.CloseClipboard() | |
return cb | |
def soup(url): | |
sockfp = urllib2.urlopen(url) | |
html = sockfp.read() | |
return BeautifulSoup(html) | |
def main(): | |
parser = ArgumentParser() | |
parser.add_argument("-d", "--directory", dest="savedir", help="save directory", type=str) | |
args = parser.parse_args() | |
url = clipboard() | |
if ('http://','https://') not in url: | |
url = 'https://' + url | |
if '//twitter.com/' not in url: | |
print 'Error: url is not twitter!' | |
return | |
print 'page url:', url | |
urldirs = url.split('/') | |
userid = urldirs[3] | |
pageid = urldirs[5] | |
s = soup(url) | |
for d in s.find("div",class_="cards-base cards-multimedia").findAll('div'): | |
p = d.attrs.get('data-url',None) | |
if p is None: continue | |
filename = os.path.basename(p) | |
filepath = '%s/%s_%s_%s' % (args.savedir,userid,pageid,filename) | |
download(p,filepath) | |
if __name__=='__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment