Skip to content

Instantly share code, notes, and snippets.

@hhsprings
Last active May 18, 2022 13:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hhsprings/7bad5a4a07f89f7d818b4810504dae52 to your computer and use it in GitHub Desktop.
Save hhsprings/7bad5a4a07f89f7d818b4810504dae52 to your computer and use it in GitHub Desktop.
Import a youtube playlist and make it your local html.
# -*- coding: utf-8 -*-
import io
import json
import re
import sys
import os
from urllib.request import urlretrieve
from urllib.request import unquote, quote
import bs4
_MEDIALISTSCRIPTTMPL = "var medialist = {medialist};"
_HTMLTMPL = """\
<html>
<head jang="ja">
<meta charset="UTF-8">
<title>{pagetitle}</title>
<link href="https://cdnjs.cloudflare.com/ajax/libs/tabulator/5.2.2/css/tabulator_site.min.css"
rel="stylesheet">
<script
type="text/javascript"
src="https://cdnjs.cloudflare.com/ajax/libs/tabulator/5.2.2/js/tabulator.min.js">
</script>
<script
type="text/javascript"
src="https://code.jquery.com/jquery-3.6.0.min.js">
</script>
<script>
var play_on_select = false;
new URL(window.location.href).search.slice(1).split("&").forEach(function (p) {{
let kv = p.split("=");
if (kv[0] == "play_on_select" && kv[1] == "1") {{
play_on_select = true;
}}
}});
</script>
<style>
body {{
margin: 1pt !important;
}}
.tabulator-header {{
font-size: 0.8em !important;
}}
.tabulator-cell {{
font-size: 0.8em !important;
padding: 1px 4px !important;
}}
.tabulator-footer {{
padding: 0 !important;
}}
.tabulator-pagenator {{
padding: 0 !important;
}}
.tabulator-page {{
font-size: xx-small !important;
}}
.button_playersize {{
font-size: small !important;
padding: 1px 1.5px !important;
min-width: 6.5em;
}}
</style>
</head>
<body>
<table border=0 style="border-spacing: 0">
<tr>
<td style='height: "100%"; vertical-align: top; min-width: 960'>
<div id="player"></div>
<!-- ### -->
<table border=0 style="border-spacing: 0">
<tr>
<td>
<div style="margin-top: 1pt; line-height: 1.0;">
<input id="s90" type="button" value="160 x 90" class="button_playersize"/>
<input id="s180" type="button" value="320 x 180" class="button_playersize"/>
<input id="s270" type="button" value="480 x 270" class="button_playersize"/>
<input id="s360" type="button" value="640 x 360" class="button_playersize"/>
<input id="s450" type="button" value="800 x 450" class="button_playersize"/>
<input id="s540" type="button" value="960 x 540" class="button_playersize"/>
<br/>
<input id="s630" type="button" value="1120 x 630" class="button_playersize"/>
<input id="s720" type="button" value="1280 x 720" class="button_playersize"/>
<input id="s810" type="button" value="1440 x 810" class="button_playersize"/>
<input id="s900" type="button" value="1600 x 900" class="button_playersize"/>
<input id="s990" type="button" value="1760 x 990" class="button_playersize"/>
<input id="s1080" type="button" value="1920 x 1080" class="button_playersize"/>
</div>
<div id="disp_stvid" style="margin-top: 4pt; line-height: 1.2 !important;"></div>
</td>
<td style="width: 0.5em;"></td>
<td>
<span id="clk1cont"></span>
</td>
<td style="min-width: 4em;">
<span id="etoc_val" style="font-size: x-small"></span>
</td>
<td>
<span id="clk2cont"></span>
</td>
<td style="width: 0.5em;"></td>
<td style="line-height: 1.1 !important; vertical-align: bottom;">
<!-- f, j, k, l, m, and 0-9 -->
<input id="pbr_value"
type="number" step="0.25"
min="0" max="2.0" value="1.0"
maxlength="4"
style="width: 5em"
/>
<input
id="pbr_btn" type="button"
value="&uarr;&darr;"
title="setPlaybackRate"
accesskey="r"
/>
<br/>
<input
id="step_btn_bw" type="button"
value="&lt;."
accesskey="a"
/>
<input id="step_value_sec"
type="number" step="0.1"
min="0" value="0.5"
maxlength="6"
style="width: 5em"/>
<input
id="step_btn_fw" type="button"
value=".&gt;"
/>
</td>
</tr>
</table>
<!-- ### -->
</td>
<td style="width: 0.1em;" valign="top">
<input id='toggle_display_playlist' type="button" value="-" style="height: 9em;"/>
</td>
<td id='playlist_container' style='height: "100%"; vertical-align: top;'>
<div id="playlist"></div>
</td>
</tr>
</table>
{medialist_script}
<script>
$(document).ready(function(){{
$('input[type="number"]').on('keyup', function () {{
let v = parseInt($(this).val());
try {{
min = parseInt($(this).attr('min'));
if (v < min) {{
v = min;
$(this).val(v);
}}
}} catch (ex) {{
}}
try {{
max = parseInt($(this).attr('max'));
if (v > max){{
v = max;
$(this).val(v);
}}
}} catch (ex) {{
}}
}})
}})
$('#toggle_display_playlist').on('click', function (ev) {{
let disp = $('#playlist_container').css('display');
if (disp === 'none') {{
$('#playlist_container').css('display', 'block');
$('#toggle_display_playlist').val('-');
}} else {{
$('#playlist_container').css('display', 'none');
$('#toggle_display_playlist').val('+');
}}
}});
function zeroPad(num, places) {{
var zero = places - num.toString().length + 1;
return Array(+(zero > 0 && zero)).join("0") + num;
}}
function tsToTss(ts, frac) {{
ts = parseFloat(ts).toFixed(frac);
let spl = ("" + ts).split(/\./);
let sign = ts > 0 ? "" : "-";
let fp = spl[1];
if (fp) {{
fp = "." + fp;
}} else {{
fp = "";
}}
let d = parseInt(spl[0]);
d = Math.abs(d);
let ss_h = parseInt(d / 3600);
d -= ss_h * 3600;
let ss_m = parseInt(d / 60);
d -= ss_m * 60;
let ss_s = parseInt(d);
return (sign + ss_h + ":" + zeroPad(ss_m, 2) + ":" + zeroPad(ss_s, 2)) + fp;
}}
function drawClock(cntid, dt) {{
let cid = cntid.replace("cont", "");
let prv = document.getElementById(cid);
if (prv) {{
prv.remove();
}}
let canvas = document.createElement('canvas');
canvas.id = cid;
canvas.width = "60";
canvas.height = "60";
let cntn = document.getElementById(cntid);
cntn.parentNode.insertBefore(canvas, cntn);
let ctx = canvas.getContext("2d");
let radius = canvas.height / 2;
ctx.translate(radius, radius);
radius = radius * 0.90;
function _drawClock(ctx, radius, hour, minute, second) {{
(function () {{
ctx.beginPath();
ctx.arc(0, 0, radius, 0, 2 * Math.PI);
ctx.fillStyle = 'white';
ctx.fill();
let grad = ctx.createRadialGradient(0, 0, radius * 0.95, 0, 0, radius * 1.05);
grad.addColorStop(0, '#333');
grad.addColorStop(0.5, 'white');
grad.addColorStop(1, '#333');
ctx.strokeStyle = grad;
ctx.lineWidth = radius * 0.1;
ctx.stroke();
ctx.beginPath();
ctx.arc(0, 0, radius * 0.1, 0, 2 * Math.PI);
ctx.fillStyle = '#333';
ctx.fill();
}})();
(function () {{
function drawHand(pos, length, width) {{
ctx.beginPath();
ctx.lineWidth = width;
ctx.lineCap = "round";
ctx.moveTo(0, 0);
ctx.rotate(pos);
ctx.lineTo(0, -length);
ctx.stroke();
ctx.rotate(-pos);
}}
hour = hour % 12;
hour = (hour * Math.PI / 6) + (minute * Math.PI / (6 * 60)) + (second * Math.PI / (360 * 60));
drawHand(hour, radius * 0.5, radius * 0.07);
minute = (minute * Math.PI / 30) + (second * Math.PI / (30 * 60));
drawHand(minute, radius * 0.8, radius * 0.07);
/*second = (second * Math.PI / 30);*/
/*drawHand(second, radius * 0.9, radius * 0.02);*/
}})();
(function () {{
ctx.font = radius * 2 * 0.15 + "px arial";
ctx.textBaseline = "middle";
ctx.textAlign = "center";
for (let num = 1; num <= 12; ++num){{
let ang = num * Math.PI / 6;
ctx.rotate(ang);
ctx.translate(0, -radius * 0.75);
ctx.rotate(-ang);
ctx.fillText(num.toString(), 0, 0);
ctx.rotate(ang);
ctx.translate(0, radius * 0.75);
ctx.rotate(-ang);
}}
}})();
ctx.translate(0, 0);
ctx.rotate(0);
}}
_drawClock(ctx, radius, dt.getHours(), dt.getMinutes(), dt.getSeconds());
}}
// To understand youtube's IFrame Player API,
// see "https://developers.google.com/youtube/iframe_api_reference",
// and "https://developers.google.com/youtube/player_parameters".
var tag = document.createElement('script');
tag.src = "https://www.youtube.com/iframe_api";
var firstScriptTag = document.getElementsByTagName('script')[0];
firstScriptTag.parentNode.insertBefore(tag, firstScriptTag);
/*
* player: global player instance, which will be created on
* onYouTubeIframeAPIReady.
*/
var player;
var dispstTimerid = null;
function dispPlayerStatus(upd) {{
let st = player.getPlayerState();
stt = "";
stt += "STATUS: ";
if (st == 3 || st == -1) {{
stt += '<span style="color: #F00; font-weight: bold">';
}}
stt += '<span style="display: inline-block; width: 1.5em; text-align: right">' + st + "</span>";
if (st == 3 || st == -1) {{
stt += '</span>';
}}
stt += ", ";
stt += "QUALITY: " + player.getPlaybackQuality() + ", ";
let pbr = player.getPlaybackRate();
if (upd) {{
$('#pbr_value').val(pbr);
}}
let ct = player.getCurrentTime();
let dur = player.getDuration();
let pospct = (100 * ct / dur);
stt += "POS: " + tsToTss(ct, 0) + "(" + pospct.toFixed(1) + "%), ";
let restsec = (dur - ct);
stt += "REST: " + tsToTss(restsec, 0);
let now = new Date();
let etoc = (new Date(now.getTime() + (restsec / pbr) * 1000));
drawClock("clk1cont", now);
drawClock("clk2cont", etoc);
$('#etoc_val').html(
"<span style='color: #aaa'>+&nbsp;" + tsToTss(restsec / pbr, 0) + "</span>" +
"<br/>&nbsp;&rarr;&nbsp;" + etoc.toLocaleTimeString());
stt = stt.replace(
/([A-Z_]+:)/g,
'<span style="font-size: x-small; color: #C0C0C0; text-decoration: underline">$1</span>')
stt = stt.replace(
/(\[.+\])/,
'<span style="color: #C0C0C0">$1</span>')
$('#disp_stvid').html(
'<span style="font-size: small;">' + stt + '</span>');
if (dispstTimerid != null) {{
clearInterval(dispstTimerid);
dispstTimerid = null;
}}
let intv = 60000;
if (st == 1 || st == 3) {{
intv = 5000 / pbr;
}}
dispstTimerid = setInterval(function () {{
dispPlayerStatus(false);
}}, intv);
}}
function onYouTubeIframeAPIReady() {{
player = new YT.Player('player', {{
width: "960",
height: "540",
playerVars: {{
rel: 0,
}},
// videoId will be set in setupPlaylist().
events: {{
"onReady": function (ev) {{
setupPlaylist();
}},
"onStateChange": function (ev) {{
dispPlayerStatus(true);
}},
"onPlaybackQualityChange": function (ev) {{
dispPlayerStatus(true);
}},
"onPlaybackRateChange": function (ev) {{
dispPlayerStatus(true);
}},
}}
}});
}}
//
function setupPlaylist() {{
var table = new Tabulator("#playlist", {{
"columnDefaults": {{
"tooltip": function (e, cell, onRendered) {{
// e - mouseover event
// cell - cell component
// onRendered - onRendered callback registration function
var el = document.createElement("div");
let rowdat = cell.getData();
let thumb = '<img src="' + rowdat["thumb"] + '" width="368" height="207" />';
let dur = "[" + rowdat["duration"] + "]";
let tit = '<div style="max-width: 36em;">' + rowdat["title"] + "</div>";
el.innerHTML = thumb + dur + tit;
return el;
}},
}},
/*"layout": "fitDataStretch",*/
"selectable": 1,
"pagination": "local",
"paginationSize": 28,
"paginationButtonCount": 10,
"headerFilterLiveFilterDelay": 1800,
"height": "920px",
"index": "videoid",
"columns": [
{{
"field": "title",
/*"frozen": true, */
"title": "title",
"headerFilter": "input",
"headerFilterFunc": "regex",
"width": "67px",
"headerSortTristate": true,
}},
{{
"field": "thumb",
"formatter": "image",
"formatterParams": {{"width": 48, "height": 27}},
"headerSortTristate": true,
}},
{{
"field": "duration",
"title": "duration",
"headerSortTristate": true,
}}
],
}});
var first_select = true;
table.on("rowSelected", function(row) {{
var videoid = row.getData().videoid;
if (play_on_select && !first_select) {{
player.loadVideoById(videoid);
}} else {{
player.cueVideoById(videoid);
}}
}});
table.on("tableBuilt", function () {{
table.setData(medialist);
table.selectRow(medialist[0]["videoid"]);
first_select = false;
}});
function _setiframesize(s) {{
player.setSize(parseInt(s[0]), parseInt(s[1]));
}};
$('#s90').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s180').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s270').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s360').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s450').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s540').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s630').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s720').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s810').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s900').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s990').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#s1080').on("click", function(ev) {{
_setiframesize(ev.target.value.split(" x "));
}});
$('#pbr_btn').on("click", function(ev) {{
let pbr = $('#pbr_value').val();
player.setPlaybackRate(parseFloat(pbr));
}});
$('#step_btn_bw').on("click", function(ev) {{
let sv = $('#step_value_sec').val();
let t = player.getCurrentTime() - parseFloat(sv);
player.seekTo(t);
}});
$('#step_btn_fw').on("click", function(ev) {{
let sv = $('#step_value_sec').val();
let t = player.getCurrentTime() + parseFloat(sv);
player.seekTo(t);
}});
}}
</script>
</body>
</html>"""
try:
from youtubesearchpython import VideosSearch, Playlist, Search
def _getplitems_ytsp(dejav, url):
if "list=" in url:
for item in Playlist.get(url)["videos"]:
vid = item["id"]
if vid in dejav:
continue
dejav.add(vid)
yield ({
"title": item["title"],
"duration": item["duration"],
"videoId": vid,
"thumbnail": item["thumbnails"][0]["url"]
}, False)
else:
if "search_query=" in url:
qs = url.partition("search_query=")[-1]
search = VideosSearch(unquote(qs))
else:
search = Search(url)
while True:
res = search.result()['result']
if not res:
break
for item in (item for item in res if item["type"] == "video"):
vid = item["id"]
if vid in dejav:
continue
dejav.add(vid)
#print(json.dumps(item, indent=4, ensure_ascii=False))
yield ({
"title": item["title"],
"duration": item["duration"],
"videoId": vid,
"thumbnail": item["thumbnails"][0]["url"]
}, False)
search.next()
except ImportError:
_getplitems_ytsp = None
def _getplitems(dejav, url):
try:
if _getplitems_ytsp:
yield from _getplitems_ytsp(dejav, url)
except Exception as e:
#import traceback
#print(traceback.format_exc(), file=sys.stderr)
print(e, file=sys.stderr)
#pass
fn, _ = urlretrieve(url)
htcont = io.open(fn, encoding="utf-8").read()
soup = bs4.BeautifulSoup(htcont, features="html.parser")
def _vritem(pvr):
#print(json.dumps(pvr, indent=4, ensure_ascii=False), end="\n\n\n")
if "title" not in pvr:
return
tit = pvr["title"]
if "runs" in tit:
tit = tit["runs"][0]["text"]
else:
tit = tit["simpleText"]
dur = ""
if "lengthText" in pvr:
dur = pvr["lengthText"]["simpleText"]
elif "thumbnailOverlays" in pvr:
try:
dur = pvr["thumbnailOverlays"][0]["thumbnailOverlayTimeStatusRenderer"]
dur = dur["text"]["simpleText"]
except Exception:
# i think this case is not what we want
#print(json.dumps(pvr, indent=4, ensure_ascii=False), end="\n\n\n")
#raise
return
vid = pvr["videoId"]
if vid in dejav:
return
dejav.add(vid)
thb = ""
if "thumbnail" in pvr:
thbs = pvr["thumbnail"]
thb = thbs["thumbnails"][0]["url"]
return {
"title": tit,
"duration": dur,
"videoId": vid,
"thumbnail": thb
}
def _getvideoitemelem(d, pa):
if isinstance(d, (list,)):
for c in d:
yield from _getvideoitemelem(c, pa)
elif isinstance(d, (dict,)):
if "videoId" in d:
yield d, pa
else:
for k, v in d.items():
pa = pa or k == "shelfRenderer"
yield from _getvideoitemelem(v, pa)
ndp = "var ytInitialData = "
for scr in soup.find_all("script"):
c = list(scr.children)
if not c:
continue
s = c[0]
if ndp not in s:
continue
d = json.loads(s[len(ndp):-1])
for c, pa in _getvideoitemelem(d, False):
item = _vritem(c)
if item:
yield [item, pa]
def _load_origdata(ofnhtml, ofnjs):
cont = None
if os.path.exists(ofnjs):
cont = io.open(ofnjs, encoding="utf-8").read()
cont = cont[len("var medialist = "):cont.rindex(";")]
elif os.path.exists(ofnhtml):
temp = io.open(ofnhtml, encoding="utf-8").read()
m = re.search('<script type="text/javascript">\nvar medialist =', temp)
if m:
cont = temp[m.end():]
cont = cont[:re.search(";\r?\n</script>", cont).start()]
if cont:
return json.loads(cont)
return []
def _dump(
pagetitle, title_pattern, exclude_title_patterns,
res, ofnbase,
separate_datafile, update_datafile):
pagetitle = re.sub(r"\&index=\d+", "", pagetitle)
pt = pagetitle.partition("?")
if len(pt) > 1:
pagetitle = "".join(list(pt[:-1]) + [unquote(pt[-1])])
if ofnbase.endswith(".html"):
ofnhtml = ofnbase
ofnjs = os.path.splitext(ofnbase)[0] + ".js"
else:
ofnhtml = ofnbase + ".html"
ofnjs = ofnbase + ".js"
medialist = []
if update_datafile:
medialist.extend(_load_origdata(ofnhtml, ofnjs))
for nitem in [dict(#people_also=pa,
videoid=item["videoId"],
title=item["title"],
duration=item["duration"],
thumb=item["thumbnail"])
for item, pa in res
if title_pattern.match(item["title"]) and (
not any([excl.search(item["title"])
for excl in exclude_title_patterns])
) and not pa]:
if nitem in medialist:
continue
medialist.append(nitem)
medialist_script_cont = _MEDIALISTSCRIPTTMPL.format(
medialist=json.dumps(
medialist,
ensure_ascii=False, indent=2))
#
if not separate_datafile:
medialist_script = """<script type="text/javascript">
{medialist_script}
</script>
""".format(medialist_script=medialist_script_cont)
else:
medialist_script = '<script type="text/javascript" src="{}"></script>'.format(
ofnjs)
with io.open(ofnjs, "w", encoding="utf-8", newline="\n") as fo:
print(medialist_script_cont, file=fo)
#
with io.open(ofnhtml, "w", encoding="utf-8", newline="\n") as fo:
print(_HTMLTMPL.format(
pagetitle=pagetitle,
medialist_script=medialist_script), file=fo)
def _urlmap(s):
from urllib.parse import urlsplit
comps = urlsplit(s)
if not comps.scheme:
return "https://www.youtube.com/playlist?list=" + s
return s
def _allplaylists(url):
base = "https://www.youtube.com/playlist?list="
fn, _ = urlretrieve(url)
htcont = io.open(fn, encoding="utf-8").read()
soup = bs4.BeautifulSoup(htcont, features="html.parser")
def _getplistitemelem(d):
if isinstance(d, (list,)):
for c in d:
yield from _getplistitemelem(c)
elif isinstance(d, (dict,)):
if "playlistId" in d:
yield d
else:
for k, v in d.items():
yield from _getplistitemelem(v)
ndp = "var ytInitialData = "
for scr in soup.find_all("script"):
c = list(scr.children)
if not c:
continue
s = c[0]
if ndp not in s:
continue
d = json.loads(s[len(ndp):-1])
yield from [
base + r.get("playlistId")
for r in _getplistitemelem(d)]
if __name__ == '__main__':
import argparse
ap = argparse.ArgumentParser()
ap.add_argument(
"--argtype",
choices=["", "search_words"],
default="")
ap.add_argument("arg", nargs="+")
ap.add_argument(
"--outfilebase",
default=os.path.splitext(os.path.basename(sys.argv[0]))[0])
ap.add_argument(
"--separate_datafile",
action="store_true")
ap.add_argument(
"--update_datafile",
action="store_true")
ap_egrp = ap.add_mutually_exclusive_group()
gr1 = ap_egrp.add_argument_group("pattern_from_cmdline")
gr2 = ap_egrp.add_argument_group("pattern_from_file")
gr1.add_argument(
"--title_pattern")
gr1.add_argument(
"--exclude_title_pattern",
action="append")
# ---
# {"match": ".*", "excludes": ["aaa", "bbb"]}
# ---
gr2.add_argument("--pattern_config")
args = ap.parse_args()
tit = []
result = []
title_pattern = ".*"
exclude_title_patterns = []
if args.pattern_config:
cfg = json.load(io.open(args.pattern_config, encoding="utf-8"))
title_pattern = cfg.get("match", ".*")
if "excludes" in cfg:
for p in cfg["excludes"]:
exclude_title_patterns.append(re.compile(p, flags=re.I))
else:
if args.title_pattern:
title_pattern = args.title_pattern
if args.exclude_title_pattern:
for p in args.exclude_title_pattern:
exclude_title_patterns.append(re.compile(p, flags=re.I))
title_pattern = re.compile(title_pattern, flags=re.I)
dejav = set()
if args.argtype == "search_words":
b = "https://www.youtube.com/results?search_query="
rawurls = [b + "+".join([quote(c.encode("utf-8")) for c in " ".join(args.arg).split()])]
else:
rawurls = [_urlmap(a) for a in args.arg]
urls = []
for url in rawurls:
if url.endswith("/playlists"):
urls.extend(list(_allplaylists(url)))
else:
urls.append(url)
for url in urls:
tit.append(url.rpartition("/")[-1])
result.extend(list(filter(None, _getplitems(dejav, url))))
_dump(
", ".join(tit),
title_pattern,
exclude_title_patterns,
result,
args.outfilebase,
args.separate_datafile,
args.update_datafile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment