Skip to content

Instantly share code, notes, and snippets.

@gitcrtn
Last active September 11, 2015 08:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gitcrtn/964cb90120a8ceeaf551 to your computer and use it in GitHub Desktop.
Save gitcrtn/964cb90120a8ceeaf551 to your computer and use it in GitHub Desktop.
twitterで1ツイートで2つ以上画像が貼られてるときに一括保存するやつ
# twitter multi pictures downloader for Python 2.7
# 2015.07.25
# @Carotene
# How to Use:
# 1. Copy to clipboard the twitter page url uploaded pictures more than 2.
# 2. Run this script with an argument of save directory.
from bs4 import BeautifulSoup
from argparse import ArgumentParser
import os, sys, urllib, urllib2, datetime, win32clipboard
def _reporthook(numblocks, blocksize, filesize, url=None):
#print "reporthook(%s, %s, %s)" % (numblocks, blocksize, filesize)
base = os.path.basename(url)
#XXX Should handle possible filesize=-1.
try:
percent = min((numblocks*blocksize*100)/filesize, 100)
except:
percent = 100
if numblocks != 0:
sys.stdout.write("\b"*70)
sys.stdout.write("%-66s%3d%%" % (base, percent))
def download(url, dst):
print "get url '%s' to '%s'" % (url, dst)
if sys.stdout.isatty():
urllib.urlretrieve(url, dst, lambda nb, bs, fs, url=url: _reporthook(nb,bs,fs,url))
sys.stdout.write('\n')
else:
urllib.urlretrieve(url, dst)
def clipboard():
win32clipboard.OpenClipboard()
cb = win32clipboard.GetClipboardData()
win32clipboard.CloseClipboard()
return cb
def soup(url):
sockfp = urllib2.urlopen(url)
html = sockfp.read()
return BeautifulSoup(html)
def main():
parser = ArgumentParser()
parser.add_argument("-d", "--directory", dest="savedir", help="save directory", type=str)
args = parser.parse_args()
url = clipboard()
if ('http://','https://') not in url:
url = 'https://' + url
if '//twitter.com/' not in url:
print 'Error: url is not twitter!'
return
print 'page url:', url
urldirs = url.split('/')
userid = urldirs[3]
pageid = urldirs[5]
s = soup(url)
for d in s.find("div",class_="cards-base cards-multimedia").findAll('div'):
p = d.attrs.get('data-url',None)
if p is None: continue
filename = os.path.basename(p)
filepath = '%s/%s_%s_%s' % (args.savedir,userid,pageid,filename)
download(p,filepath)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment