Created
October 28, 2023 17:28
-
-
Save pakkinlau/7383d965316fe43f3d9240e466d3af20 to your computer and use it in GitHub Desktop.
A python script that output youtube video substitle. Only video_id is needed to run the script.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Loading the model:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from youtube_transcript_api import YouTubeTranscriptApi\n", | |
"import re" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## User inputs:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"For this video (PNKj529yY5c) transcripts are available in the following languages:\n", | |
"\n", | |
"(MANUALLY CREATED)\n", | |
" - en (\"英文 - CC\")[TRANSLATABLE]\n", | |
" - ko (\"韓文\")[TRANSLATABLE]\n", | |
"\n", | |
"(GENERATED)\n", | |
" - en (\"英文 (自動產生)\")[TRANSLATABLE]\n", | |
"\n", | |
"(TRANSLATION LANGUAGES)\n", | |
" - tr (\"土耳其文\")\n", | |
" - tk (\"土庫曼文\")\n", | |
" - lg (\"干達文\")\n", | |
" - zh-Hant (\"中文(繁體字)\")\n", | |
" - zh-Hans (\"中文(簡體字)\")\n", | |
" - da (\"丹麥文\")\n", | |
" - eu (\"巴斯克文\")\n", | |
" - ja (\"日文\")\n", | |
" - mi (\"毛利文\")\n", | |
" - jv (\"爪哇文\")\n", | |
" - eo (\"世界語\")\n", | |
" - gl (\"加里西亞文\")\n", | |
" - ca (\"加泰隆尼亞文\")\n", | |
" - nso (\"北索托文\")\n", | |
" - gu (\"古吉拉特文\")\n", | |
" - sw (\"史瓦希里文\")\n", | |
" - ne (\"尼泊爾文\")\n", | |
" - ny (\"尼揚賈文\")\n", | |
" - gn (\"瓜拉尼文\")\n", | |
" - be (\"白俄羅斯文\")\n", | |
" - lt (\"立陶宛文\")\n", | |
" - ig (\"伊博文\")\n", | |
" - is (\"冰島文\")\n", | |
" - hu (\"匈牙利文\")\n", | |
" - id (\"印尼文\")\n", | |
" - hi (\"印地文\")\n", | |
" - ky (\"吉爾吉斯文\")\n", | |
" - lo (\"老撾文\")\n", | |
" - ay (\"艾馬拉文\")\n", | |
" - fy (\"西弗里西亞文\")\n", | |
" - es (\"西班牙文\")\n", | |
" - kri (\"克裡奧爾文\")\n", | |
" - hr (\"克羅地亞文\")\n", | |
" - kn (\"坎納達文\")\n", | |
" - iw (\"希伯來文\")\n", | |
" - el (\"希臘文\")\n", | |
" - hy (\"亞美尼亞文\")\n", | |
" - bn (\"孟加拉文\")\n", | |
" - la (\"拉丁文\")\n", | |
" - lv (\"拉脫維亞文\")\n", | |
" - ln (\"林加拉文\")\n", | |
" - fr (\"法文\")\n", | |
" - fa (\"波斯文\")\n", | |
" - bs (\"波斯尼亞文\")\n", | |
" - pl (\"波蘭文\")\n", | |
" - fi (\"芬蘭文\")\n", | |
" - ak (\"阿坎文\")\n", | |
" - am (\"阿姆哈拉文\")\n", | |
" - ar (\"阿拉伯文\")\n", | |
" - az (\"阿塞拜疆文\")\n", | |
" - sq (\"阿爾巴尼亞文\")\n", | |
" - as (\"阿薩姆文\")\n", | |
" - ru (\"俄文\")\n", | |
" - bg (\"保加利亞文\")\n", | |
" - sd (\"信德文\")\n", | |
" - af (\"南非荷蘭文\")\n", | |
" - kk (\"哈薩克文\")\n", | |
" - cy (\"威爾斯文\")\n", | |
" - co (\"科西嘉文\")\n", | |
" - xh (\"科薩文\")\n", | |
" - yo (\"約魯巴文\")\n", | |
" - hmn (\"苗語\")\n", | |
" - en (\"英文\")\n", | |
" - dv (\"迪維西文\")\n", | |
" - sn (\"修納文\")\n", | |
" - ee (\"埃維文\")\n", | |
" - haw (\"夏威夷文\")\n", | |
" - ku (\"庫德文\")\n", | |
" - no (\"挪威文\")\n", | |
" - pa (\"旁遮普文\")\n", | |
" - ka (\"格魯吉亞文\")\n", | |
" - th (\"泰文\")\n", | |
" - ta (\"泰米爾文\")\n", | |
" - te (\"泰盧固文\")\n", | |
" - ht (\"海地文\")\n", | |
" - uk (\"烏克蘭文\")\n", | |
" - uz (\"烏茲別克文\")\n", | |
" - ur (\"烏爾都文\")\n", | |
" - ts (\"特松加文\")\n", | |
" - zu (\"祖魯文\")\n", | |
" - so (\"索馬里文\")\n", | |
" - mt (\"馬耳他文\")\n", | |
" - ms (\"馬來文\")\n", | |
" - mk (\"馬其頓文\")\n", | |
" - mg (\"馬拉加斯文\")\n", | |
" - mr (\"馬拉地文\")\n", | |
" - ml (\"馬拉雅拉姆文\")\n", | |
" - km (\"高棉文\")\n", | |
" - ceb (\"宿霧文\")\n", | |
" - cs (\"捷克文\")\n", | |
" - sa (\"梵文\")\n", | |
" - nl (\"荷蘭文\")\n", | |
" - bho (\"博傑普爾文\")\n", | |
" - su (\"巽他文\")\n", | |
" - ti (\"提格利尼亞文\")\n", | |
" - sl (\"斯洛文尼亞文\")\n", | |
" - sk (\"斯洛伐克文\")\n", | |
" - ps (\"普什圖文\")\n", | |
" - fil (\"菲律賓文\")\n", | |
" - vi (\"越南文\")\n", | |
" - tg (\"塔吉克文\")\n", | |
" - st (\"塞索托文\")\n", | |
" - sr (\"塞爾維亞文\")\n", | |
" - or (\"奧里雅文\")\n", | |
" - om (\"奧羅莫文\")\n", | |
" - it (\"意大利文\")\n", | |
" - yi (\"意第緒文\")\n", | |
" - et (\"愛沙尼亞文\")\n", | |
" - ga (\"愛爾蘭文\")\n", | |
" - sv (\"瑞典文\")\n", | |
" - pt (\"葡萄牙文\")\n", | |
" - si (\"僧伽羅文\")\n", | |
" - ug (\"維吾爾文\")\n", | |
" - mn (\"蒙古文\")\n", | |
" - qu (\"蓋楚瓦文\")\n", | |
" - ha (\"豪撒文\")\n", | |
" - de (\"德文\")\n", | |
" - my (\"緬甸文\")\n", | |
" - rw (\"盧旺達文\")\n", | |
" - lb (\"盧森堡文\")\n", | |
" - ko (\"韓文\")\n", | |
" - sm (\"薩摩亞文\")\n", | |
" - ro (\"羅馬尼亞文\")\n", | |
" - gd (\"蘇格蘭蓋爾文\")\n", | |
" - tt (\"韃靼文\")\n" | |
] | |
} | |
], | |
"source": [ | |
"video_ids = [\"PNKj529yY5c\"]\n", | |
"languages = ['en']\n", | |
"\n", | |
"# Language (see below if not certain): \n", | |
"print(YouTubeTranscriptApi.list_transcripts(video_id = video_ids[0]))\n", | |
"\n", | |
"# transcript = YouTubeTranscriptApi.get_transcripts(video_ids, languages=languages)\n", | |
"# print(transcript)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_article_to_txt(video_ids, languages, txt_name='article'):\n", | |
" transcript = YouTubeTranscriptApi.get_transcripts(video_ids, languages=languages)\n", | |
" dict_form = transcript[0]\n", | |
" first_key = next(iter(transcript[0]))\n", | |
" first_value: list = dict_form[first_key] # which is a list of key value pairs\n", | |
" \n", | |
" article = \"\"\n", | |
" for elt in first_value:\n", | |
" # print(elt['text'])\n", | |
" article = article + elt['text'] + \" \"\n", | |
" \n", | |
" pat = ('(?<!Dr)(?<!Esq)\\. +(?=[A-Z])')\n", | |
" formatted_article = re.sub(pat,'.\\n',article)\n", | |
" formatted_article = article.replace('\\n',' ').replace('.','.\\n').replace('?','?\\n')\n", | |
" \n", | |
" with open(txt_name, 'w') as f:\n", | |
" f.write(f\"{formatted_article}.txt\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.\n" | |
] | |
}, | |
{ | |
"ename": "OSError", | |
"evalue": "ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mRegexNotFoundError\u001b[0m Traceback (most recent call last)", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:815\u001b[0m, in \u001b[0;36mYoutubeDL.__handle_extraction_exceptions.<locals>.wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 814\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 815\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 816\u001b[0m \u001b[39mexcept\u001b[39;00m GeoRestrictedError \u001b[39mas\u001b[39;00m e:\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:836\u001b[0m, in \u001b[0;36mYoutubeDL.__extract_info\u001b[1;34m(self, url, ie, download, extra_info, process)\u001b[0m\n\u001b[0;32m 834\u001b[0m \u001b[39m@__handle_extraction_exceptions\u001b[39m\n\u001b[0;32m 835\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__extract_info\u001b[39m(\u001b[39mself\u001b[39m, url, ie, download, extra_info, process):\n\u001b[1;32m--> 836\u001b[0m ie_result \u001b[39m=\u001b[39m ie\u001b[39m.\u001b[39;49mextract(url)\n\u001b[0;32m 837\u001b[0m \u001b[39mif\u001b[39;00m ie_result \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39m# Finished already (backwards compatibility; listformats and friends should be moved here)\u001b[39;00m\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\common.py:534\u001b[0m, in \u001b[0;36mInfoExtractor.extract\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 533\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39minitialize()\n\u001b[1;32m--> 534\u001b[0m ie_result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_real_extract(url)\n\u001b[0;32m 535\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_x_forwarded_for_ip:\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\youtube.py:1794\u001b[0m, in \u001b[0;36mYoutubeIE._real_extract\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 1782\u001b[0m owner_profile_url \u001b[39m=\u001b[39m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mownerProfileUrl\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1784\u001b[0m info \u001b[39m=\u001b[39m {\n\u001b[0;32m 1785\u001b[0m \u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m: video_id,\n\u001b[0;32m 1786\u001b[0m \u001b[39m'\u001b[39m\u001b[39mtitle\u001b[39m\u001b[39m'\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_live_title(video_title) \u001b[39mif\u001b[39;00m is_live \u001b[39melse\u001b[39;00m video_title,\n\u001b[0;32m 1787\u001b[0m \u001b[39m'\u001b[39m\u001b[39mformats\u001b[39m\u001b[39m'\u001b[39m: formats,\n\u001b[0;32m 1788\u001b[0m \u001b[39m'\u001b[39m\u001b[39mthumbnails\u001b[39m\u001b[39m'\u001b[39m: thumbnails,\n\u001b[0;32m 1789\u001b[0m \u001b[39m'\u001b[39m\u001b[39mdescription\u001b[39m\u001b[39m'\u001b[39m: video_description,\n\u001b[0;32m 1790\u001b[0m \u001b[39m'\u001b[39m\u001b[39mupload_date\u001b[39m\u001b[39m'\u001b[39m: unified_strdate(\n\u001b[0;32m 1791\u001b[0m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39muploadDate\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1792\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39muploadDate\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m 1793\u001b[0m \u001b[39m'\u001b[39m\u001b[39muploader\u001b[39m\u001b[39m'\u001b[39m: video_details[\u001b[39m'\u001b[39m\u001b[39mauthor\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m-> 1794\u001b[0m \u001b[39m'\u001b[39m\u001b[39muploader_id\u001b[39m\u001b[39m'\u001b[39m: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_search_regex(\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m/(?:channel|user)/([^/?&#]+)\u001b[39;49m\u001b[39m'\u001b[39;49m, owner_profile_url, \u001b[39m'\u001b[39;49m\u001b[39muploader id\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mif\u001b[39;00m owner_profile_url \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1795\u001b[0m \u001b[39m'\u001b[39m\u001b[39muploader_url\u001b[39m\u001b[39m'\u001b[39m: owner_profile_url,\n\u001b[0;32m 1796\u001b[0m \u001b[39m'\u001b[39m\u001b[39mchannel_id\u001b[39m\u001b[39m'\u001b[39m: channel_id,\n\u001b[0;32m 1797\u001b[0m \u001b[39m'\u001b[39m\u001b[39mchannel_url\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m'\u001b[39m\u001b[39mhttps://www.youtube.com/channel/\u001b[39m\u001b[39m'\u001b[39m \u001b[39m+\u001b[39m channel_id \u001b[39mif\u001b[39;00m channel_id \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1798\u001b[0m \u001b[39m'\u001b[39m\u001b[39mduration\u001b[39m\u001b[39m'\u001b[39m: duration,\n\u001b[0;32m 1799\u001b[0m \u001b[39m'\u001b[39m\u001b[39mview_count\u001b[39m\u001b[39m'\u001b[39m: int_or_none(\n\u001b[0;32m 1800\u001b[0m video_details\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mviewCount\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1801\u001b[0m \u001b[39mor\u001b[39;00m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mviewCount\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1802\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39minteractionCount\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m 1803\u001b[0m \u001b[39m'\u001b[39m\u001b[39maverage_rating\u001b[39m\u001b[39m'\u001b[39m: float_or_none(video_details\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39maverageRating\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m 1804\u001b[0m \u001b[39m'\u001b[39m\u001b[39mage_limit\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m18\u001b[39m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 1805\u001b[0m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39misFamilySafe\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mis\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 1806\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39misFamilyFriendly\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mfalse\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 1807\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39mog:restrictions:age\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39m18+\u001b[39m\u001b[39m'\u001b[39m) \u001b[39melse\u001b[39;00m \u001b[39m0\u001b[39m,\n\u001b[0;32m 1808\u001b[0m \u001b[39m'\u001b[39m\u001b[39mwebpage_url\u001b[39m\u001b[39m'\u001b[39m: webpage_url,\n\u001b[0;32m 1809\u001b[0m \u001b[39m'\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m'\u001b[39m: [category] \u001b[39mif\u001b[39;00m category \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1810\u001b[0m \u001b[39m'\u001b[39m\u001b[39mtags\u001b[39m\u001b[39m'\u001b[39m: keywords,\n\u001b[0;32m 1811\u001b[0m \u001b[39m'\u001b[39m\u001b[39mis_live\u001b[39m\u001b[39m'\u001b[39m: is_live,\n\u001b[0;32m 1812\u001b[0m }\n\u001b[0;32m 1814\u001b[0m pctr \u001b[39m=\u001b[39m try_get(\n\u001b[0;32m 1815\u001b[0m player_response,\n\u001b[0;32m 1816\u001b[0m \u001b[39mlambda\u001b[39;00m x: x[\u001b[39m'\u001b[39m\u001b[39mcaptions\u001b[39m\u001b[39m'\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mplayerCaptionsTracklistRenderer\u001b[39m\u001b[39m'\u001b[39m], \u001b[39mdict\u001b[39m)\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\common.py:1012\u001b[0m, in \u001b[0;36mInfoExtractor._search_regex\u001b[1;34m(self, pattern, string, name, default, fatal, flags, group)\u001b[0m\n\u001b[0;32m 1011\u001b[0m \u001b[39melif\u001b[39;00m fatal:\n\u001b[1;32m-> 1012\u001b[0m \u001b[39mraise\u001b[39;00m RegexNotFoundError(\u001b[39m'\u001b[39m\u001b[39mUnable to extract \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m _name)\n\u001b[0;32m 1013\u001b[0m \u001b[39melse\u001b[39;00m:\n", | |
"\u001b[1;31mRegexNotFoundError\u001b[0m: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[1;31mDownloadError\u001b[0m Traceback (most recent call last)", | |
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:39\u001b[0m, in \u001b[0;36mYtdlPafy._fetch_basic\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 39\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ydl_info \u001b[39m=\u001b[39m ydl\u001b[39m.\u001b[39;49mextract_info(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mvideoid, download\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n\u001b[0;32m 40\u001b[0m \u001b[39m# Turn into an IOError since that is what pafy previously raised\u001b[39;00m\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:808\u001b[0m, in \u001b[0;36mYoutubeDL.extract_info\u001b[1;34m(self, url, download, ie_key, extra_info, process, force_generic_extractor)\u001b[0m\n\u001b[0;32m 805\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreport_warning(\u001b[39m'\u001b[39m\u001b[39mThe program functionality for this site has been marked as broken, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 806\u001b[0m \u001b[39m'\u001b[39m\u001b[39mand will probably not work.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m--> 808\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__extract_info(url, ie, download, extra_info, process)\n\u001b[0;32m 809\u001b[0m \u001b[39melse\u001b[39;00m:\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:824\u001b[0m, in \u001b[0;36mYoutubeDL.__handle_extraction_exceptions.<locals>.wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 823\u001b[0m \u001b[39mexcept\u001b[39;00m ExtractorError \u001b[39mas\u001b[39;00m e: \u001b[39m# An error we somewhat expected\u001b[39;00m\n\u001b[1;32m--> 824\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreport_error(compat_str(e), e\u001b[39m.\u001b[39;49mformat_traceback())\n\u001b[0;32m 825\u001b[0m \u001b[39mexcept\u001b[39;00m MaxDownloadsReached:\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:628\u001b[0m, in \u001b[0;36mYoutubeDL.report_error\u001b[1;34m(self, message, tb)\u001b[0m\n\u001b[0;32m 627\u001b[0m error_message \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (_msg_header, message)\n\u001b[1;32m--> 628\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrouble(error_message, tb)\n", | |
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:598\u001b[0m, in \u001b[0;36mYoutubeDL.trouble\u001b[1;34m(self, message, tb)\u001b[0m\n\u001b[0;32m 597\u001b[0m exc_info \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n\u001b[1;32m--> 598\u001b[0m \u001b[39mraise\u001b[39;00m DownloadError(message, exc_info)\n\u001b[0;32m 599\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_download_retcode \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n", | |
"\u001b[1;31mDownloadError\u001b[0m: ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.", | |
"\nDuring handling of the above exception, another exception occurred:\n", | |
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\subtitle.ipynb Cell 6\u001b[0m in \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttps://www.youtube.com/watch?v=\u001b[39m\u001b[39m{\u001b[39;00mvideo_ids[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39m# getting video\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m video \u001b[39m=\u001b[39m pafy\u001b[39m.\u001b[39;49mnew(url) \n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m value \u001b[39m=\u001b[39m video\u001b[39m.\u001b[39mtitle\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m value \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39msub(\u001b[39mr\u001b[39m\u001b[39m'\u001b[39m\u001b[39m[^\u001b[39m\u001b[39m\\\u001b[39m\u001b[39mw]\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m, value) \u001b[39m# remove any special characters that are not alphabet or numbers\u001b[39;00m\n", | |
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\pafy.py:124\u001b[0m, in \u001b[0;36mnew\u001b[1;34m(url, basic, gdata, size, callback, ydl_opts)\u001b[0m\n\u001b[0;32m 121\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 122\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbackend_youtube_dl\u001b[39;00m \u001b[39mimport\u001b[39;00m YtdlPafy \u001b[39mas\u001b[39;00m Pafy\n\u001b[1;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m Pafy(url, basic, gdata, size, callback, ydl_opts\u001b[39m=\u001b[39;49mydl_opts)\n", | |
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:30\u001b[0m, in \u001b[0;36mYtdlPafy.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[39mif\u001b[39;00m ydl_opts:\n\u001b[0;32m 29\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ydl_opts\u001b[39m.\u001b[39mupdate(ydl_opts)\n\u001b[1;32m---> 30\u001b[0m \u001b[39msuper\u001b[39m(YtdlPafy, \u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", | |
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_shared.py:97\u001b[0m, in \u001b[0;36mBasePafy.__init__\u001b[1;34m(self, video_url, basic, gdata, size, callback, ydl_opts)\u001b[0m\n\u001b[0;32m 94\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexpiry \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 96\u001b[0m \u001b[39mif\u001b[39;00m basic:\n\u001b[1;32m---> 97\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fetch_basic()\n\u001b[0;32m 99\u001b[0m \u001b[39mif\u001b[39;00m gdata:\n\u001b[0;32m 100\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_fetch_gdata()\n", | |
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:42\u001b[0m, in \u001b[0;36mYtdlPafy._fetch_basic\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 40\u001b[0m \u001b[39m# Turn into an IOError since that is what pafy previously raised\u001b[39;00m\n\u001b[0;32m 41\u001b[0m \u001b[39mexcept\u001b[39;00m youtube_dl\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mDownloadError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m---> 42\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mIOError\u001b[39;00m(\u001b[39mstr\u001b[39m(e)\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39mYouTube said\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mYoutube says\u001b[39m\u001b[39m'\u001b[39m))\n\u001b[0;32m 44\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback:\n\u001b[0;32m 45\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback(\u001b[39m\"\u001b[39m\u001b[39mFetched video info\u001b[39m\u001b[39m\"\u001b[39m)\n", | |
"\u001b[1;31mOSError\u001b[0m: ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output." | |
] | |
} | |
], | |
"source": [ | |
"## getting title from youtube video by its link\n", | |
"\n", | |
"import pafy_develop.pafy.pafy as pafy\n", | |
" \n", | |
"# url of video \n", | |
"url = f\"https://www.youtube.com/watch?v={video_ids[0]}\"\n", | |
" \n", | |
"# getting video\n", | |
"video = pafy.new(url) \n", | |
"value = video.title\n", | |
"value = re.sub(r'[^\\w]', ' ', value) # remove any special characters that are not alphabet or numbers\n", | |
" \n", | |
"# printing the value\n", | |
"print(\"Title : \" + value)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The subtitle is going to be created as {value}.txt. Please check\n" | |
] | |
} | |
], | |
"source": [ | |
"print(\"The subtitle is going to be created as {value}.txt. Please check\")\n", | |
"\n", | |
"get_article_to_txt(video_ids, languages, txt_name= value)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "FileNotFoundError", | |
"evalue": "[Errno 2] No such file or directory: '25 Symmetric Matrices and Positive Definiteness.txt'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[1;32mIn [6], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m## preview the result:\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m{\u001b[39;49;00mvalue\u001b[39m}\u001b[39;49;00m\u001b[39m.txt\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m 4\u001b[0m A \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mread()\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(A)\n", | |
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '25 Symmetric Matrices and Positive Definiteness.txt'" | |
] | |
} | |
], | |
"source": [ | |
"## preview the result:\n", | |
"\n", | |
"with open(f\"{value}.txt\", 'r') as f:\n", | |
" A = f.read()\n", | |
" print(A)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.10.4 64-bit", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.4" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "c19fa61d258bb2b35aae2ada233c33e2817c1ce895aa48acba720c6bf7cbe3cb" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment