Skip to content

Instantly share code, notes, and snippets.

@wyyqyl
Last active November 1, 2016 14:20
Show Gist options
  • Save wyyqyl/9a5ea9b5ffe675514585 to your computer and use it in GitHub Desktop.
Save wyyqyl/9a5ea9b5ffe675514585 to your computer and use it in GitHub Desktop.
Download subtitles from shooter
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import hashlib
import sys
import math
import json
import urllib2
import urllib
try:
import chardet
except ImportError:
print 'Install chardet via `pip install chardet`'
sys.exit(1)
def download(url, headers):
req = urllib2.Request(url, '', headers)
return urllib2.urlopen(req).read()
def get_hash(name):
L = list()
with open(name, 'rb') as f:
e = 4096
f.seek(0, os.SEEK_END)
size = f.tell()
# first 4k
start = min(size, 4096)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
L.append(digest)
# second 4k
start = math.floor(size / 3 * 2)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
L.append(digest)
# third 4k
start = math.floor(size / 3)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
L.append(digest)
# fourth 4k
start = max(0, size - 8192)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
L.append(digest)
return L
def sub_downloader(path, lang):
dirname = os.path.dirname(path)
filename = os.path.basename(path)
name, ext = os.path.splitext(filename)
if ext not in [".mkv", ".avi", ".mp4"]:
return
hash = get_hash(path)
headers = {'User-Agent': 'wyyqyl'}
filehash = hash[0] + '%3B' + hash[1] + '%3B' + hash[2] + '%3B' + hash[3]
url = 'http://www.shooter.cn/api/subapi.php?filehash=' + filehash + '&format=json&pathinfo=' + urllib.pathname2url(filename) + '&lang=' + lang
print url
response = download(url, headers)
if ord(response[0]) == 0xff:
print '❌ [{}] It doesn\'t exist'.format(lang)
return
print '📢 [{}] It exists, downloading'.format(lang)
sub_infos = json.loads(response)
idx = 0
for sub_info in sub_infos:
if 'Files' in sub_info:
success = False
for file_info in sub_info['Files']:
try:
url = file_info['Link'].replace('\u0026', '&')
content = download(url, headers)
encoding = chardet.detect(content).get('encoding')
if encoding == None:
# Is it UTF-16LE without BOM?
encoding = chardet.detect('\xFF\xFE' + content).get('encoding')
if encoding == None:
# Is it UTF-16BE without BOM?
encoding = chardet.detect('\xFE\xFF' + content).get('encoding')
if encoding == None:
# I got no idea, skip downloading subtitle
continue
content = content.decode(encoding, 'ignore').encode('utf-8')
subtitle_name = name + '.' + lang + "." + str(idx) + "." + file_info['Ext']
with open(os.path.join(dirname, subtitle_name), 'wb') as subtitle:
subtitle.write(content)
success = True
except Exception as e:
print e
print '❌ [{}] Error occured'.format(lang)
if success:
idx += 1
print '✅ [{}] {} subtitles downloaded'.format(lang, idx)
def main():
for root, _, files in os.walk(sys.argv[1]):
for file in files:
path = os.path.join(root, file)
print '💪 Processing {}'.format(path)
for lang in ['chn', 'eng']:
sub_downloader(path, lang)
print '\n'
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment