Skip to content

Instantly share code, notes, and snippets.

@ymotongpoo
Created February 1, 2011 17:03
Show Gist options
  • Save ymotongpoo/806160 to your computer and use it in GitHub Desktop.
Save ymotongpoo/806160 to your computer and use it in GitHub Desktop.
download script for specific file in mod_uploader
#!/bin/bash
download_dir=./download
cd $download_dir
for file in `find . -name "*.zip"`; do
unzip $file
rm $file
touch $file
done
# -*- coding: utf-8 -*-
import re
import string
import urllib
import urllib2
import cookielib
import os.path
# mod_uploaderのURLとエンコード
#mod_uploader = ur"http://upload0.dyndns.org/up/2/_/"
mod_uploader = ur"http://up02.ayame.jp/up/"
encoding = "euc-jp"
form_dict = {u'download_pass': u"junk", # パスワード決め打ち
u'code_pat': u"京",
u'submit': u"ダウンロード"}
link_pattern = u'<a\ href="(?P<url>' + mod_uploader \
+ 'jump/\d+\.zip/attatch)">(?P<name>%s)</a>'
max_page = 32
# ファイル名のリスト 正規表現利用可能
files = [u".*馬鹿力.*cut.*\.zip",
u".*爆笑\ ?cut.*\.zip",
u".*バナナ.*\.zip"]
seek_size = 1024 * 512
boundary = u"--------python"
# ダウンロード用ディレクトリ
download_dir = "./download"
def build_opener():
"""
ヘッダを偽装したopenerを作成
"""
jar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar),
urllib2.HTTPRedirectHandler())
opener.addheaders = [("User-Agent", "Mozilla/5.0 (compatible; python)"),
("Connection", "keep-alive"),
("Accept-Encoding", "gzip,deflate,sdch")]
return opener
def multipart_formdata(form_dict):
"""
multipart/form-dataのbody部分を作成
"""
disposition = u'Content-Disposition: form-data; name="%s"'
lines = []
for k, v in form_dict.iteritems():
lines.append(u'--' + boundary)
lines.append(disposition % k)
lines.append(u'')
lines.append(v)
lines.append(u"--" + boundary + u"--")
lines.append(u'')
value = u"\r\n".join(lines)
return value.encode(encoding)
def extract_file_url(opener, files, page=max_page):
"""
1ページからpageページまでアクセスし、
対象ファイルのURLとファイル名のタプルのリストを返す
"""
patterns = []
for i in range(1, page+1):
url = mod_uploader + "index/" + str(i)
print "===> parsing %s" % url
fp = opener.open(url)
data = fp.read().decode(encoding)
fp.close()
for f in files:
for m in re.finditer(link_pattern % f, data, re.UNICODE):
patterns.append(m.groupdict())
return patterns
def download_file(opener, patterns):
"""
patternsにあるURLすべてにアクセスしファイルをダウンロードする
"""
for p in patterns:
# リクエストを作成
url = string.replace(p['url'], u'jump', u'download')
req = urllib2.Request(url)
# 基本的なヘッダを追加
req.add_header("Referer", p['url'])
req.add_header("Content-Type",
"multipart/form-data; boundary=%s" % boundary)
data = multipart_formdata(form_dict)
conn = opener.open(req, data)
length = conn.info()['Content-Length']
length = int(length)
# 5MB以下は失敗
if length < 1024 * 1024 * 5:
print ("### %s : size too small !!! -> %d byte"
% (p['name'], length) )
continue
# ファイルが既存か確認し、レジュームかどうかを判断
save_path = os.path.join(download_dir, p['name'])
redownload = False
if os.path.exists(save_path):
redownload = True
size = os.path.getsize(save_path)
if size >= length:
print "### %s is already downloaded" % p['name']
continue
else:
conn = opener.open(req, data)
zipfile = open(save_path, 'wb')
if redownload:
print "### '%s' : re-download" % p['name']
print "### '%s' : start download !!!" % p['name']
while True:
print " === downloading '%s' : %d bytes left" % (p['name'], length)
data = conn.read(seek_size)
if not data:
break
zipfile.write(data)
length = length - seek_size if length > seek_size else 0
zipfile.close()
def main():
opener = build_opener()
patterns = extract_file_url(opener, files)
download_file(opener, patterns)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment