Skip to content

Instantly share code, notes, and snippets.

@ChronoMonochrome
Last active August 10, 2017 17:25
Show Gist options
  • Save ChronoMonochrome/979d921d8161c56316f1f572ad040cf3 to your computer and use it in GitHub Desktop.
Save ChronoMonochrome/979d921d8161c56316f1f572ad040cf3 to your computer and use it in GitHub Desktop.
Video grabber
tabulate==0.7.7
bs4==0.0.1
httplib2==0.10.3
requests==2.18.3
mechanize==0.3.5
#!/usr/bin/python
import os, sys, errno, time
class Logger(object):
def __init__(self, log):
self.stdout = sys.stdout
self.stderr = sys.stderr
self.log = open(log, "wb")
self.log_file = log
def write(self, message):
self.stdout.write(message)
self.log.write(message)
def flush(self):
self.log.flush()
def catch(self, _raise = False):
__func__ = sys._getframe().f_back.f_code.co_name
exc_type, exc_obj, exc_tb = sys.exc_info()
message = u"%s: %s on line %d: %s\n" %(__func__, exc_type, exc_tb.tb_lineno, exc_obj)
print(message)
if _raise:
self.stderr.write(message)
self.stderr.write("The full backtrace was saved in %s.\n" % self.log_file)
raise
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
mkdir_p("logs")
logger = Logger("logs/downloader_log.txt")
#sys.stdout = logger
#sys.stderr = logger
try:
import win32clipboard as cp
except:
pass
#import cgi, pickle
#sys.path.append("/home/chrono/python/site-packages")
try:
from bs4 import BeautifulSoup
except:
logger.catch(_raise = True)
import httplib2
import urllib2, urlparse, requests, requests.utils, cookielib
import pickle
COOKIE_FILE = ".cookie.txt"
def save_cookies(cookiejar, path):
with open(path, "w") as f:
pickle.dump(requests.utils.dict_from_cookiejar(cookiejar), f)
def load_cookies(path):
with open(path) as f:
cookies = requests.utils.cookiejar_from_dict(pickle.load(f))
return cookies
if not os.path.exists(COOKIE_FILE):
print "creating cookie file"
try:
# create cookie jar
import mechanize
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.1144')]
#r = br.open('https://shikimori.org')
r = br.open('https://shikimori.org/animes/33926-quan-zhi-gao-shou')
#r = br.open('https://play.shikimori.org/animes/33926-quan-zhi-gao-shou/video_online/1')
save_cookies(br.cookiejar, COOKIE_FILE)
br.close()
except:
logger.catch(_raise = True)
print "load cookies"
shiki_cookies = load_cookies(COOKIE_FILE)
#from urllib import unquote
html_parser = lambda html: BeautifulSoup(html, "html.parser")
url_reader_def = lambda url: urllib2.urlopen(url).read()
url_reader_with_opener = lambda url: self.opener.open(urllib2.Request(url)).read()
url_reader_http = lambda url: http.request(url)[1]
CONN_ATTEMPTS = 20
CONN_INTERVAL = 0.5
class VideoGrabber:
url = ""
response = ""
use_iframe_url = None
use_session = False
session = None
url_reader = None
headers = None
def __init__(self, url, use_iframe_url = True, use_session = False, session_url = "",
headers = None, cookie_tag = ""):
#URL example: https://play.shikimori.org/animes/31933-jojo-no-kimyou-na-bouken-diamond-wa-kudakenai/video_online/10/1272721
self.url = url
self.use_iframe_url = use_iframe_url
self.use_session = use_session
#if use_session:
self.headers = headers
self.session_start(session_url, cookie_tag)
self.url_reader = lambda url: self.session_read(url).text
#else:
#self.url_reader = lambda url: urllib2.urlopen(url).read()
def __exit__(self):
if self.use_session:
self.session_close()
def __del__(self):
if self.use_session:
self.session_close()
def __repr__(self):
return 'VideoGrabber(url="%s", use_iframe_url="%s")' % (self.url, self.use_iframe_url)
def url_read(self, url):
return self.url_reader(url)
def session_read(self, url):
if self.headers:
self.response = self.session.get(url, headers = self.headers)
else:
self.response = self.session.get(url)
return self.response
def session_start(self):
if not self.session:
self.session = requests.session()
self.session.cookies = self.cookies
def session_close(self):
if self.session:
self.session.close()
self.session = None
def _get_sibnet(self, url = ""):
url = url if url else self.url
if self.use_iframe_url:
player_iframe_url = self.get_iframe()
else:
# url example "http://video.sibnet.ru/shell.php?videoid=2601859"
player_iframe_url = url
player = html_parser(urllib2.urlopen(player_iframe_url).read()).text
if player == "": return ""
m3u = player.find("m3u")
mp4 = "http://video.sibnet.ru/" + player[m3u - 44: m3u] + "m3u8"
try:
m3u = [i for i in self.url_reader(mp4).split("\n") if not i.startswith("#")][0]
return m3u.replace(".ts?", ".mp4?")
except:
return mp4
def get_iframe(self):
html = html_parser(self.url_read(self.url))
player_iframe_url = "http:" + html.find("div", attrs = {"class": "player-area"}).find("iframe").get("src")
return player_iframe_url
def _get_vk(self, url = ""):
player_iframe_url = url if url else self.get_iframe()
html = html_parser(urllib2.urlopen(player_iframe_url).read())
src = html.find("video", attrs = {"id": "video_player"})
return [i.get("src").split("?")[0] for i in src.findAll("source")]
def _get_smotret_anime(self, url = ""):
player_iframe_url = url if url else self.get_iframe()
html = html_parser(self.url_read(player_iframe_url))
video = html.find("video")
sources = eval(video.get("data-alternative-sources"))
return [i["urls"][0].replace("\\", "") for i in sources]
def get_mp4(self, url = ""):
res = ""
player_iframe_url = url if url else self.get_iframe()
if player_iframe_url.find("sibnet") != -1:
return self._get_sibnet()
elif player_iframe_url.find("smotret-anime") != -1:
return self._get_smotret_anime(player_iframe_url)[0]
else:
return self._get_vk(player_iframe_url)[1]
class ShikiGrabber(VideoGrabber):
url = ""
response = ""
use_iframe_url = False
use_session = False
session = None
url_reader = None
data_id = None
headers = None
def __init__(self, url, url_reader = None, use_iframe_url = True, use_session = False, cookies = None):
"URL example: https://play.shikimori.org/animes/31933-jojo-no-kimyou-na-bouken-diamond-wa-kudakenai/video_online/10"
self.url = url
self.use_iframe_url = use_iframe_url
self.use_session = use_session
self.cookies = cookies
if url_reader:
self.url_reader = url_reader
else:
if use_session:
print "session start"
self.session_start()
print "session started"
self.url_reader = lambda url: self.session_read(url).text
else:
self.url_reader = lambda url: urllib2.urlopen(url).read()
def get_data_id(self, url):
html = html_parser(self.url_read(url))
return html.find("div", attrs = {"class": "video-variant-group"}).find("div", attrs={"class": "b-video_variant"}).get("data-id")
cond=(__name__ == "__main__")
if cond:
try:
url = None
try:
s = ""
print "start"
sg = ShikiGrabber(sys.argv[1], url_reader = None, use_session = True, cookies = shiki_cookies)
print sg
for url in sys.argv[1:]:
#print(url)
sg.url = url
mp4_url = sg.get_mp4()
print(mp4_url)
s += mp4_url + "\n"
try:
cp.OpenClipboard()
cp.SetClipboardText(s)
time.sleep(0.3)
cp.CloseClipboard()
except:pass
except:catch()
if not url:
print "entering interactive mode"
while 1:
url = raw_input(">>> enter URL: ")
sg = ShikiGrabber(url, url_reader = url_reader_def)
print sg.get_vk()[1]
except:
print sys.argv
catch()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment