Last active
August 10, 2017 17:25
-
-
Save ChronoMonochrome/979d921d8161c56316f1f572ad040cf3 to your computer and use it in GitHub Desktop.
Video grabber
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tabulate==0.7.7 | |
bs4==0.0.1 | |
httplib2==0.10.3 | |
requests==2.18.3 | |
mechanize==0.3.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os, sys, errno, time | |
class Logger(object): | |
def __init__(self, log): | |
self.stdout = sys.stdout | |
self.stderr = sys.stderr | |
self.log = open(log, "wb") | |
self.log_file = log | |
def write(self, message): | |
self.stdout.write(message) | |
self.log.write(message) | |
def flush(self): | |
self.log.flush() | |
def catch(self, _raise = False): | |
__func__ = sys._getframe().f_back.f_code.co_name | |
exc_type, exc_obj, exc_tb = sys.exc_info() | |
message = u"%s: %s on line %d: %s\n" %(__func__, exc_type, exc_tb.tb_lineno, exc_obj) | |
print(message) | |
if _raise: | |
self.stderr.write(message) | |
self.stderr.write("The full backtrace was saved in %s.\n" % self.log_file) | |
raise | |
def mkdir_p(path): | |
try: | |
os.makedirs(path) | |
except OSError as exc: # Python >2.5 | |
if exc.errno == errno.EEXIST and os.path.isdir(path): | |
pass | |
else: | |
raise | |
mkdir_p("logs") | |
logger = Logger("logs/downloader_log.txt") | |
#sys.stdout = logger | |
#sys.stderr = logger | |
try: | |
import win32clipboard as cp | |
except: | |
pass | |
#import cgi, pickle | |
#sys.path.append("/home/chrono/python/site-packages") | |
try: | |
from bs4 import BeautifulSoup | |
except: | |
logger.catch(_raise = True) | |
import httplib2 | |
import urllib2, urlparse, requests, requests.utils, cookielib | |
import pickle | |
COOKIE_FILE = ".cookie.txt" | |
def save_cookies(cookiejar, path): | |
with open(path, "w") as f: | |
pickle.dump(requests.utils.dict_from_cookiejar(cookiejar), f) | |
def load_cookies(path): | |
with open(path) as f: | |
cookies = requests.utils.cookiejar_from_dict(pickle.load(f)) | |
return cookies | |
if not os.path.exists(COOKIE_FILE): | |
print "creating cookie file" | |
try: | |
# create cookie jar | |
import mechanize | |
br = mechanize.Browser() | |
cj = cookielib.LWPCookieJar() | |
br.set_cookiejar(cj) | |
br.set_handle_equiv(True) | |
br.set_handle_gzip(True) | |
br.set_handle_redirect(True) | |
br.set_handle_referer(True) | |
br.set_handle_robots(False) | |
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) | |
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.1144')] | |
#r = br.open('https://shikimori.org') | |
r = br.open('https://shikimori.org/animes/33926-quan-zhi-gao-shou') | |
#r = br.open('https://play.shikimori.org/animes/33926-quan-zhi-gao-shou/video_online/1') | |
save_cookies(br.cookiejar, COOKIE_FILE) | |
br.close() | |
except: | |
logger.catch(_raise = True) | |
print "load cookies" | |
shiki_cookies = load_cookies(COOKIE_FILE) | |
#from urllib import unquote | |
html_parser = lambda html: BeautifulSoup(html, "html.parser") | |
url_reader_def = lambda url: urllib2.urlopen(url).read() | |
url_reader_with_opener = lambda url: self.opener.open(urllib2.Request(url)).read() | |
url_reader_http = lambda url: http.request(url)[1] | |
CONN_ATTEMPTS = 20 | |
CONN_INTERVAL = 0.5 | |
class VideoGrabber: | |
url = "" | |
response = "" | |
use_iframe_url = None | |
use_session = False | |
session = None | |
url_reader = None | |
headers = None | |
def __init__(self, url, use_iframe_url = True, use_session = False, session_url = "", | |
headers = None, cookie_tag = ""): | |
#URL example: https://play.shikimori.org/animes/31933-jojo-no-kimyou-na-bouken-diamond-wa-kudakenai/video_online/10/1272721 | |
self.url = url | |
self.use_iframe_url = use_iframe_url | |
self.use_session = use_session | |
#if use_session: | |
self.headers = headers | |
self.session_start(session_url, cookie_tag) | |
self.url_reader = lambda url: self.session_read(url).text | |
#else: | |
#self.url_reader = lambda url: urllib2.urlopen(url).read() | |
def __exit__(self): | |
if self.use_session: | |
self.session_close() | |
def __del__(self): | |
if self.use_session: | |
self.session_close() | |
def __repr__(self): | |
return 'VideoGrabber(url="%s", use_iframe_url="%s")' % (self.url, self.use_iframe_url) | |
def url_read(self, url): | |
return self.url_reader(url) | |
def session_read(self, url): | |
if self.headers: | |
self.response = self.session.get(url, headers = self.headers) | |
else: | |
self.response = self.session.get(url) | |
return self.response | |
def session_start(self): | |
if not self.session: | |
self.session = requests.session() | |
self.session.cookies = self.cookies | |
def session_close(self): | |
if self.session: | |
self.session.close() | |
self.session = None | |
def _get_sibnet(self, url = ""): | |
url = url if url else self.url | |
if self.use_iframe_url: | |
player_iframe_url = self.get_iframe() | |
else: | |
# url example "http://video.sibnet.ru/shell.php?videoid=2601859" | |
player_iframe_url = url | |
player = html_parser(urllib2.urlopen(player_iframe_url).read()).text | |
if player == "": return "" | |
m3u = player.find("m3u") | |
mp4 = "http://video.sibnet.ru/" + player[m3u - 44: m3u] + "m3u8" | |
try: | |
m3u = [i for i in self.url_reader(mp4).split("\n") if not i.startswith("#")][0] | |
return m3u.replace(".ts?", ".mp4?") | |
except: | |
return mp4 | |
def get_iframe(self): | |
html = html_parser(self.url_read(self.url)) | |
player_iframe_url = "http:" + html.find("div", attrs = {"class": "player-area"}).find("iframe").get("src") | |
return player_iframe_url | |
def _get_vk(self, url = ""): | |
player_iframe_url = url if url else self.get_iframe() | |
html = html_parser(urllib2.urlopen(player_iframe_url).read()) | |
src = html.find("video", attrs = {"id": "video_player"}) | |
return [i.get("src").split("?")[0] for i in src.findAll("source")] | |
def _get_smotret_anime(self, url = ""): | |
player_iframe_url = url if url else self.get_iframe() | |
html = html_parser(self.url_read(player_iframe_url)) | |
video = html.find("video") | |
sources = eval(video.get("data-alternative-sources")) | |
return [i["urls"][0].replace("\\", "") for i in sources] | |
def get_mp4(self, url = ""): | |
res = "" | |
player_iframe_url = url if url else self.get_iframe() | |
if player_iframe_url.find("sibnet") != -1: | |
return self._get_sibnet() | |
elif player_iframe_url.find("smotret-anime") != -1: | |
return self._get_smotret_anime(player_iframe_url)[0] | |
else: | |
return self._get_vk(player_iframe_url)[1] | |
class ShikiGrabber(VideoGrabber): | |
url = "" | |
response = "" | |
use_iframe_url = False | |
use_session = False | |
session = None | |
url_reader = None | |
data_id = None | |
headers = None | |
def __init__(self, url, url_reader = None, use_iframe_url = True, use_session = False, cookies = None): | |
"URL example: https://play.shikimori.org/animes/31933-jojo-no-kimyou-na-bouken-diamond-wa-kudakenai/video_online/10" | |
self.url = url | |
self.use_iframe_url = use_iframe_url | |
self.use_session = use_session | |
self.cookies = cookies | |
if url_reader: | |
self.url_reader = url_reader | |
else: | |
if use_session: | |
print "session start" | |
self.session_start() | |
print "session started" | |
self.url_reader = lambda url: self.session_read(url).text | |
else: | |
self.url_reader = lambda url: urllib2.urlopen(url).read() | |
def get_data_id(self, url): | |
html = html_parser(self.url_read(url)) | |
return html.find("div", attrs = {"class": "video-variant-group"}).find("div", attrs={"class": "b-video_variant"}).get("data-id") | |
cond=(__name__ == "__main__") | |
if cond: | |
try: | |
url = None | |
try: | |
s = "" | |
print "start" | |
sg = ShikiGrabber(sys.argv[1], url_reader = None, use_session = True, cookies = shiki_cookies) | |
print sg | |
for url in sys.argv[1:]: | |
#print(url) | |
sg.url = url | |
mp4_url = sg.get_mp4() | |
print(mp4_url) | |
s += mp4_url + "\n" | |
try: | |
cp.OpenClipboard() | |
cp.SetClipboardText(s) | |
time.sleep(0.3) | |
cp.CloseClipboard() | |
except:pass | |
except:catch() | |
if not url: | |
print "entering interactive mode" | |
while 1: | |
url = raw_input(">>> enter URL: ") | |
sg = ShikiGrabber(url, url_reader = url_reader_def) | |
print sg.get_vk()[1] | |
except: | |
print sys.argv | |
catch() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment