Skip to content

Instantly share code, notes, and snippets.

@DarrenSem
Last active March 26, 2024 20:28
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DarrenSem/dd66d5b1b133e35775c75abedd52fc95 to your computer and use it in GitHub Desktop.
Save DarrenSem/dd66d5b1b133e35775c75abedd52fc95 to your computer and use it in GitHub Desktop.
ytSubs.js - YouTube subtitles - English (auto-generated) CC (closed captions) - Usage: node ytSubs.js videoIdOrUrl, or Web browser BOOKMARKLET into a new window
// ytSubs.js (SEE DISCLAIMER) - via @DarrenSem https://gist.github.com/DarrenSem (22Mar2024)
// YouTube subtitles - English (auto-generated) CC (closed captions)
// Usage: node ytSubs.js videoIdOrUrl
// or Web browser BOOKMARKLET (contents open in a new window)
// ES6 bookmarklet = 4045 chars
javascript:void function(){"use strict";var a=String.fromCharCode;const b="",c=async a=>{try{if(globalThis.fetch){const b=await fetch(a),c=await b.text();return c}return new Promise((b,c)=>{const d=require(/^https/.test(a)?"https":"http").get(a,d=>{if(200>d.statusCode||299<d.statusCode)return c(Error(`${d.statusCode} ${d.statusMessage} ${a}`));const e=[];d.on("data",a=>e.push(a)),d.on("end",()=>b(e.join("")))});d.on("error",a=>c(a))})}catch(a){throw a}},d=a=>{try{a=(a||"")+"";const b=a.match(/"captionTracks":.*"isTranslatable"\:.*?}]/),c=JSON.parse(`{${(b||[""])[0]}}`).captionTracks||[],d=c.map(a=>{const b=a.name;return[g(b.simpleText||b.runs&&b.runs[0].text),a.baseUrl+"&fmt=json3"]});return d}catch(a){}},e=(a,b,c,d)=>{a=new Date(a),[,b,c,d]=(+a?a:new Date).toString().split(" ");const e=`${c}${b}${d}`;return e},f=a=>{a="string"!=typeof a&&a?new URL(a):{href:a||""};const b=a&&a.href.trim()||"",c=b.replace(/(https?:\/\/)?\/*(.+?)\/?$/,"https://$2"),d=c.replace(/(https:\/\/)([-\w]+)$/,"$1youtu.be/$2"),e=d.replace(/(https:\/\/(?:youtu\.be|(?:\w+\.)?youtube\.\w+)\/)([-\w]+)$/,"$1$2/?&v=$2");try{b&&(a=new URL(e))}catch(a){}const{pathname:f,searchParams:g}=a,h=g&&g.get("v")||f&&(f.match(/^\/?(shorts|video|watch|live|stream|audio)\/([^?&\/]+)/)||[])[2];return h},g=b=>((b||"")+"").replace(/\\(?:u([a-zA-Z\d]{4})|x([a-zA-Z\d]{2})|(.))/g,(b,c,d,e)=>c?a(parseInt(c,16)):d?a(parseInt(d,16)):`${e}`),h=async a=>{const b=a&&new URL(a),h=f(b);let i="";h&&(i=await c(b));const j=new RegExp(`,"videoDetails":{"videoId":"${h}","title":"(.+?)","lengthSeconds":.+?"publishDate":"(.+?)","ownerChannelName":"([^"]+)".+?"uploadDate":"(.+?)"`),k=i.match(j),[,m,,n,o]=k||[],p=/:/.test(o)?o:`${o} 1:00`,q={id:h,title:g(m),urls:d(i),channel:l(g(n)),date:e(p)};return q},i=async a=>{const{urls:b}=a,d=b&&b[(b.findIndex(b=>b[0].endsWith(`English (auto-generated)`))+1||b.findIndex(b=>b[0].startsWith(`English (`))+1||b.findIndex(b=>b[0].startsWith(`English `))+1||1)-1];let e,f="";return d&&(f=await c(d[1])),e=JSON.parse((f||null)+""),e},j=a=>{let b=0,c="";const d=/[\s↵\x00-\x20\x7f-\x9f]+/g,e=/(?:♪|\[(INAUDIBLE|Applause|Laughter|Music|Man|Woman)\]|\((?:distorted singing|voice echoing)\))\s*/g,f=/\[\s_+\s\]/g,{events:g=[]}=a||{};return g.forEach(a=>{const{segs:g=[]}=a;let h="";if(g.forEach(a=>{const{utf8:b=""}=a,c=b.replace(/\s+/g," ").trim(),g=c.replace(d," ").replace(f,"[_]").replace(e,"");""!==g&&(h+=" "+g)}),h=h.trim(),h.length){const a=b?" ":"",d=b+1<3?"":"\n\n";c+=a+h+d,h="",b=d?0:b+1}}),c.trim()},k=(a,b)=>{if(globalThis.process)return console.info(b);const c=open(URL.createObjectURL(new Blob([b],{type:"text/plain;charset=utf-8"})),"_blank");c&&!c.closed&&(c.onload=()=>{a&&(c.document.title=a)})},l=a=>{a=null==a?"":a+"";const b=a.replace(/[\u200b\u00ad\ufffc]/g,""),c=b.replace(/[\xa0\ufeff]/g," "),d=c.replace(/[\u{1F600}-\u{1F64F}]/gu,""),e=d.replace(/\s*[¸,]/g,","),f=e.replace(/[…]/g,"..."),g=f.replace(/\s*[.\uf03a\u2024]/g,"."),h=g.replace(/¡/g,"!"),i=h.replace(/⋕/g,"#"),j=i.replace(/[@]/g," at "),k=j.replace(/[~˜^‸ˆ⋍•]+/g,"-"),l=k.replace(/(\s*[¦:–—|‹›·«»¯⋅⋆⋇⋖⋗]+\s*)|\s+(-{2,})\s*|\s*[-]+\s+/g," - "),m=l.replace(/\s*[\x25‱]/g," percent"),n=m.replace(/["″‴“”‘’′´`‵‶‷¨]|&(quot|#34|apos|#39);/g,"'"),o=n.replace(/[/\\:*?"<>|]|&lt;|&gt;/g,"_"),p=o.replace(/(\s*(&amp;)+\s*)+/g," and "),q=p.replace(/(?:\s*&(\w+?);+(\s)*)+/g,"_$1;$2"),r=q.replace(/(\s*[&]+\s*)/g," and "),s=r.replace(/\s+/g," ").trim();return s},m=async(a,b)=>{try{b=((b||"https://www.youtube.com/watch?v=")+"").trim(),a=((a||"")+"").trim();const c=globalThis.document&&location.href||"",d=a?`${/[^-\w]/.test(a)?"":b}${a}`:c,e="//m.youtube.",m=c.includes(e)?b.replace("//www.youtube.",e):b;a=f(d);const n=a&&`${m}${a}`,o=await h(n),p=await i(o),{title:q,channel:r,date:s,id:t}=o;if(!r)return"";const u=`${b}${t}&ab_channel=${r.replace(/\s/g,"")}`,v=j(p),w=l(g(q)),x=`${w} (${r} ${s} ${t}) - transcript`+`\n\n...\n\n${u}\n\n`+v;return k(q,x+"\n\n"),x}catch(a){return console.error(a),a.message}};{const a=globalThis.process&&process.argv[2]||b&&b;void m(a)}}();
// DISCLAIMER: **For educational and code technique / demonstrative purposes only, not responsible or liable for any use by anyone.**
@DarrenSem
Copy link
Author

DarrenSem commented Mar 26, 2024

// https://www.youtube.com/watch?v=0vGLXkQV3cg&ab_channel=FoxNews

tracks

0: {lang: 'English (auto-generated)', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…4CE1C66F807AA0&key=yt8&kind=asr&lang=en&fmt=json3', languageCode: 'en'}
1: {lang: 'English - CC1', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…4CE1C66F807AA0&key=yt8&lang=en&name=CC1&fmt=json3', languageCode: 'en'}
2: {lang: 'English - DTVCC1', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…1C66F807AA0&key=yt8&lang=en&name=DTVCC1&fmt=json3', languageCode: 'en'}
3: {lang: 'English (United States)', url: 'https://www.youtube.com/api/timedtext?v=0vGLXkQV3c…EF5C224CE1C66F807AA0&key=yt8&lang=en-US&fmt=json3', languageCode: 'en-US'}

length: 4

^ "English (United States)", then /en\-.+/ takes precedence over just /en/, only THEN use the first /en/ and then use [0].

(tracks => (
			tracks.filter(track => track.lang.match(/English \(auto-generated\)/i))[0]
			|| tracks.filter(track => track.languageCode.match(/^en/i))[0]
			|| tracks[0]
		));

now should become this...

(tracks => (
			tracks.filter(track => track.lang.match(/English \(auto-generated\)/i))[0]
			|| tracks.filter(track => track.lang.match(/English \(United States\)/i))[0]
			|| tracks.filter(track => track.languageCode.match(/^en\-.+/i))[0]
			|| tracks.filter(track => track.languageCode.match(/^en/i))[0]
			|| tracks[0]
		));

PS: make this use "find" [not findIndex] INSTEAD OF .filter (DUH!)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment