Skip to content

Instantly share code, notes, and snippets.

@pakkinlau
Created October 28, 2023 17:28
Show Gist options
  • Save pakkinlau/7383d965316fe43f3d9240e466d3af20 to your computer and use it in GitHub Desktop.
Save pakkinlau/7383d965316fe43f3d9240e466d3af20 to your computer and use it in GitHub Desktop.
A python script that output youtube video substitle. Only video_id is needed to run the script.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading the model:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from youtube_transcript_api import YouTubeTranscriptApi\n",
"import re"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## User inputs:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"For this video (PNKj529yY5c) transcripts are available in the following languages:\n",
"\n",
"(MANUALLY CREATED)\n",
" - en (\"英文 - CC\")[TRANSLATABLE]\n",
" - ko (\"韓文\")[TRANSLATABLE]\n",
"\n",
"(GENERATED)\n",
" - en (\"英文 (自動產生)\")[TRANSLATABLE]\n",
"\n",
"(TRANSLATION LANGUAGES)\n",
" - tr (\"土耳其文\")\n",
" - tk (\"土庫曼文\")\n",
" - lg (\"干達文\")\n",
" - zh-Hant (\"中文(繁體字)\")\n",
" - zh-Hans (\"中文(簡體字)\")\n",
" - da (\"丹麥文\")\n",
" - eu (\"巴斯克文\")\n",
" - ja (\"日文\")\n",
" - mi (\"毛利文\")\n",
" - jv (\"爪哇文\")\n",
" - eo (\"世界語\")\n",
" - gl (\"加里西亞文\")\n",
" - ca (\"加泰隆尼亞文\")\n",
" - nso (\"北索托文\")\n",
" - gu (\"古吉拉特文\")\n",
" - sw (\"史瓦希里文\")\n",
" - ne (\"尼泊爾文\")\n",
" - ny (\"尼揚賈文\")\n",
" - gn (\"瓜拉尼文\")\n",
" - be (\"白俄羅斯文\")\n",
" - lt (\"立陶宛文\")\n",
" - ig (\"伊博文\")\n",
" - is (\"冰島文\")\n",
" - hu (\"匈牙利文\")\n",
" - id (\"印尼文\")\n",
" - hi (\"印地文\")\n",
" - ky (\"吉爾吉斯文\")\n",
" - lo (\"老撾文\")\n",
" - ay (\"艾馬拉文\")\n",
" - fy (\"西弗里西亞文\")\n",
" - es (\"西班牙文\")\n",
" - kri (\"克裡奧爾文\")\n",
" - hr (\"克羅地亞文\")\n",
" - kn (\"坎納達文\")\n",
" - iw (\"希伯來文\")\n",
" - el (\"希臘文\")\n",
" - hy (\"亞美尼亞文\")\n",
" - bn (\"孟加拉文\")\n",
" - la (\"拉丁文\")\n",
" - lv (\"拉脫維亞文\")\n",
" - ln (\"林加拉文\")\n",
" - fr (\"法文\")\n",
" - fa (\"波斯文\")\n",
" - bs (\"波斯尼亞文\")\n",
" - pl (\"波蘭文\")\n",
" - fi (\"芬蘭文\")\n",
" - ak (\"阿坎文\")\n",
" - am (\"阿姆哈拉文\")\n",
" - ar (\"阿拉伯文\")\n",
" - az (\"阿塞拜疆文\")\n",
" - sq (\"阿爾巴尼亞文\")\n",
" - as (\"阿薩姆文\")\n",
" - ru (\"俄文\")\n",
" - bg (\"保加利亞文\")\n",
" - sd (\"信德文\")\n",
" - af (\"南非荷蘭文\")\n",
" - kk (\"哈薩克文\")\n",
" - cy (\"威爾斯文\")\n",
" - co (\"科西嘉文\")\n",
" - xh (\"科薩文\")\n",
" - yo (\"約魯巴文\")\n",
" - hmn (\"苗語\")\n",
" - en (\"英文\")\n",
" - dv (\"迪維西文\")\n",
" - sn (\"修納文\")\n",
" - ee (\"埃維文\")\n",
" - haw (\"夏威夷文\")\n",
" - ku (\"庫德文\")\n",
" - no (\"挪威文\")\n",
" - pa (\"旁遮普文\")\n",
" - ka (\"格魯吉亞文\")\n",
" - th (\"泰文\")\n",
" - ta (\"泰米爾文\")\n",
" - te (\"泰盧固文\")\n",
" - ht (\"海地文\")\n",
" - uk (\"烏克蘭文\")\n",
" - uz (\"烏茲別克文\")\n",
" - ur (\"烏爾都文\")\n",
" - ts (\"特松加文\")\n",
" - zu (\"祖魯文\")\n",
" - so (\"索馬里文\")\n",
" - mt (\"馬耳他文\")\n",
" - ms (\"馬來文\")\n",
" - mk (\"馬其頓文\")\n",
" - mg (\"馬拉加斯文\")\n",
" - mr (\"馬拉地文\")\n",
" - ml (\"馬拉雅拉姆文\")\n",
" - km (\"高棉文\")\n",
" - ceb (\"宿霧文\")\n",
" - cs (\"捷克文\")\n",
" - sa (\"梵文\")\n",
" - nl (\"荷蘭文\")\n",
" - bho (\"博傑普爾文\")\n",
" - su (\"巽他文\")\n",
" - ti (\"提格利尼亞文\")\n",
" - sl (\"斯洛文尼亞文\")\n",
" - sk (\"斯洛伐克文\")\n",
" - ps (\"普什圖文\")\n",
" - fil (\"菲律賓文\")\n",
" - vi (\"越南文\")\n",
" - tg (\"塔吉克文\")\n",
" - st (\"塞索托文\")\n",
" - sr (\"塞爾維亞文\")\n",
" - or (\"奧里雅文\")\n",
" - om (\"奧羅莫文\")\n",
" - it (\"意大利文\")\n",
" - yi (\"意第緒文\")\n",
" - et (\"愛沙尼亞文\")\n",
" - ga (\"愛爾蘭文\")\n",
" - sv (\"瑞典文\")\n",
" - pt (\"葡萄牙文\")\n",
" - si (\"僧伽羅文\")\n",
" - ug (\"維吾爾文\")\n",
" - mn (\"蒙古文\")\n",
" - qu (\"蓋楚瓦文\")\n",
" - ha (\"豪撒文\")\n",
" - de (\"德文\")\n",
" - my (\"緬甸文\")\n",
" - rw (\"盧旺達文\")\n",
" - lb (\"盧森堡文\")\n",
" - ko (\"韓文\")\n",
" - sm (\"薩摩亞文\")\n",
" - ro (\"羅馬尼亞文\")\n",
" - gd (\"蘇格蘭蓋爾文\")\n",
" - tt (\"韃靼文\")\n"
]
}
],
"source": [
"video_ids = [\"PNKj529yY5c\"]\n",
"languages = ['en']\n",
"\n",
"# Language (see below if not certain): \n",
"print(YouTubeTranscriptApi.list_transcripts(video_id = video_ids[0]))\n",
"\n",
"# transcript = YouTubeTranscriptApi.get_transcripts(video_ids, languages=languages)\n",
"# print(transcript)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def get_article_to_txt(video_ids, languages, txt_name='article'):\n",
" transcript = YouTubeTranscriptApi.get_transcripts(video_ids, languages=languages)\n",
" dict_form = transcript[0]\n",
" first_key = next(iter(transcript[0]))\n",
" first_value: list = dict_form[first_key] # which is a list of key value pairs\n",
" \n",
" article = \"\"\n",
" for elt in first_value:\n",
" # print(elt['text'])\n",
" article = article + elt['text'] + \" \"\n",
" \n",
" pat = ('(?<!Dr)(?<!Esq)\\. +(?=[A-Z])')\n",
" formatted_article = re.sub(pat,'.\\n',article)\n",
" formatted_article = article.replace('\\n',' ').replace('.','.\\n').replace('?','?\\n')\n",
" \n",
" with open(txt_name, 'w') as f:\n",
" f.write(f\"{formatted_article}.txt\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.\n"
]
},
{
"ename": "OSError",
"evalue": "ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mRegexNotFoundError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:815\u001b[0m, in \u001b[0;36mYoutubeDL.__handle_extraction_exceptions.<locals>.wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 814\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 815\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 816\u001b[0m \u001b[39mexcept\u001b[39;00m GeoRestrictedError \u001b[39mas\u001b[39;00m e:\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:836\u001b[0m, in \u001b[0;36mYoutubeDL.__extract_info\u001b[1;34m(self, url, ie, download, extra_info, process)\u001b[0m\n\u001b[0;32m 834\u001b[0m \u001b[39m@__handle_extraction_exceptions\u001b[39m\n\u001b[0;32m 835\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__extract_info\u001b[39m(\u001b[39mself\u001b[39m, url, ie, download, extra_info, process):\n\u001b[1;32m--> 836\u001b[0m ie_result \u001b[39m=\u001b[39m ie\u001b[39m.\u001b[39;49mextract(url)\n\u001b[0;32m 837\u001b[0m \u001b[39mif\u001b[39;00m ie_result \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m: \u001b[39m# Finished already (backwards compatibility; listformats and friends should be moved here)\u001b[39;00m\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\common.py:534\u001b[0m, in \u001b[0;36mInfoExtractor.extract\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 533\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39minitialize()\n\u001b[1;32m--> 534\u001b[0m ie_result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_real_extract(url)\n\u001b[0;32m 535\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_x_forwarded_for_ip:\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\youtube.py:1794\u001b[0m, in \u001b[0;36mYoutubeIE._real_extract\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m 1782\u001b[0m owner_profile_url \u001b[39m=\u001b[39m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mownerProfileUrl\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1784\u001b[0m info \u001b[39m=\u001b[39m {\n\u001b[0;32m 1785\u001b[0m \u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m: video_id,\n\u001b[0;32m 1786\u001b[0m \u001b[39m'\u001b[39m\u001b[39mtitle\u001b[39m\u001b[39m'\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_live_title(video_title) \u001b[39mif\u001b[39;00m is_live \u001b[39melse\u001b[39;00m video_title,\n\u001b[0;32m 1787\u001b[0m \u001b[39m'\u001b[39m\u001b[39mformats\u001b[39m\u001b[39m'\u001b[39m: formats,\n\u001b[0;32m 1788\u001b[0m \u001b[39m'\u001b[39m\u001b[39mthumbnails\u001b[39m\u001b[39m'\u001b[39m: thumbnails,\n\u001b[0;32m 1789\u001b[0m \u001b[39m'\u001b[39m\u001b[39mdescription\u001b[39m\u001b[39m'\u001b[39m: video_description,\n\u001b[0;32m 1790\u001b[0m \u001b[39m'\u001b[39m\u001b[39mupload_date\u001b[39m\u001b[39m'\u001b[39m: unified_strdate(\n\u001b[0;32m 1791\u001b[0m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39muploadDate\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1792\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39muploadDate\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m 1793\u001b[0m \u001b[39m'\u001b[39m\u001b[39muploader\u001b[39m\u001b[39m'\u001b[39m: video_details[\u001b[39m'\u001b[39m\u001b[39mauthor\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m-> 1794\u001b[0m \u001b[39m'\u001b[39m\u001b[39muploader_id\u001b[39m\u001b[39m'\u001b[39m: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_search_regex(\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m/(?:channel|user)/([^/?&#]+)\u001b[39;49m\u001b[39m'\u001b[39;49m, owner_profile_url, \u001b[39m'\u001b[39;49m\u001b[39muploader id\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mif\u001b[39;00m owner_profile_url \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1795\u001b[0m \u001b[39m'\u001b[39m\u001b[39muploader_url\u001b[39m\u001b[39m'\u001b[39m: owner_profile_url,\n\u001b[0;32m 1796\u001b[0m \u001b[39m'\u001b[39m\u001b[39mchannel_id\u001b[39m\u001b[39m'\u001b[39m: channel_id,\n\u001b[0;32m 1797\u001b[0m \u001b[39m'\u001b[39m\u001b[39mchannel_url\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m'\u001b[39m\u001b[39mhttps://www.youtube.com/channel/\u001b[39m\u001b[39m'\u001b[39m \u001b[39m+\u001b[39m channel_id \u001b[39mif\u001b[39;00m channel_id \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1798\u001b[0m \u001b[39m'\u001b[39m\u001b[39mduration\u001b[39m\u001b[39m'\u001b[39m: duration,\n\u001b[0;32m 1799\u001b[0m \u001b[39m'\u001b[39m\u001b[39mview_count\u001b[39m\u001b[39m'\u001b[39m: int_or_none(\n\u001b[0;32m 1800\u001b[0m video_details\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mviewCount\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1801\u001b[0m \u001b[39mor\u001b[39;00m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mviewCount\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 1802\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39minteractionCount\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m 1803\u001b[0m \u001b[39m'\u001b[39m\u001b[39maverage_rating\u001b[39m\u001b[39m'\u001b[39m: float_or_none(video_details\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39maverageRating\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m 1804\u001b[0m \u001b[39m'\u001b[39m\u001b[39mage_limit\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m18\u001b[39m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 1805\u001b[0m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39misFamilySafe\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mis\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m 1806\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39misFamilyFriendly\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mfalse\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 1807\u001b[0m \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39mog:restrictions:age\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39m18+\u001b[39m\u001b[39m'\u001b[39m) \u001b[39melse\u001b[39;00m \u001b[39m0\u001b[39m,\n\u001b[0;32m 1808\u001b[0m \u001b[39m'\u001b[39m\u001b[39mwebpage_url\u001b[39m\u001b[39m'\u001b[39m: webpage_url,\n\u001b[0;32m 1809\u001b[0m \u001b[39m'\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m'\u001b[39m: [category] \u001b[39mif\u001b[39;00m category \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 1810\u001b[0m \u001b[39m'\u001b[39m\u001b[39mtags\u001b[39m\u001b[39m'\u001b[39m: keywords,\n\u001b[0;32m 1811\u001b[0m \u001b[39m'\u001b[39m\u001b[39mis_live\u001b[39m\u001b[39m'\u001b[39m: is_live,\n\u001b[0;32m 1812\u001b[0m }\n\u001b[0;32m 1814\u001b[0m pctr \u001b[39m=\u001b[39m try_get(\n\u001b[0;32m 1815\u001b[0m player_response,\n\u001b[0;32m 1816\u001b[0m \u001b[39mlambda\u001b[39;00m x: x[\u001b[39m'\u001b[39m\u001b[39mcaptions\u001b[39m\u001b[39m'\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mplayerCaptionsTracklistRenderer\u001b[39m\u001b[39m'\u001b[39m], \u001b[39mdict\u001b[39m)\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\common.py:1012\u001b[0m, in \u001b[0;36mInfoExtractor._search_regex\u001b[1;34m(self, pattern, string, name, default, fatal, flags, group)\u001b[0m\n\u001b[0;32m 1011\u001b[0m \u001b[39melif\u001b[39;00m fatal:\n\u001b[1;32m-> 1012\u001b[0m \u001b[39mraise\u001b[39;00m RegexNotFoundError(\u001b[39m'\u001b[39m\u001b[39mUnable to extract \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m _name)\n\u001b[0;32m 1013\u001b[0m \u001b[39melse\u001b[39;00m:\n",
"\u001b[1;31mRegexNotFoundError\u001b[0m: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mDownloadError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:39\u001b[0m, in \u001b[0;36mYtdlPafy._fetch_basic\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 38\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 39\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ydl_info \u001b[39m=\u001b[39m ydl\u001b[39m.\u001b[39;49mextract_info(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mvideoid, download\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n\u001b[0;32m 40\u001b[0m \u001b[39m# Turn into an IOError since that is what pafy previously raised\u001b[39;00m\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:808\u001b[0m, in \u001b[0;36mYoutubeDL.extract_info\u001b[1;34m(self, url, download, ie_key, extra_info, process, force_generic_extractor)\u001b[0m\n\u001b[0;32m 805\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreport_warning(\u001b[39m'\u001b[39m\u001b[39mThe program functionality for this site has been marked as broken, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m 806\u001b[0m \u001b[39m'\u001b[39m\u001b[39mand will probably not work.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m--> 808\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__extract_info(url, ie, download, extra_info, process)\n\u001b[0;32m 809\u001b[0m \u001b[39melse\u001b[39;00m:\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:824\u001b[0m, in \u001b[0;36mYoutubeDL.__handle_extraction_exceptions.<locals>.wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 823\u001b[0m \u001b[39mexcept\u001b[39;00m ExtractorError \u001b[39mas\u001b[39;00m e: \u001b[39m# An error we somewhat expected\u001b[39;00m\n\u001b[1;32m--> 824\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreport_error(compat_str(e), e\u001b[39m.\u001b[39;49mformat_traceback())\n\u001b[0;32m 825\u001b[0m \u001b[39mexcept\u001b[39;00m MaxDownloadsReached:\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:628\u001b[0m, in \u001b[0;36mYoutubeDL.report_error\u001b[1;34m(self, message, tb)\u001b[0m\n\u001b[0;32m 627\u001b[0m error_message \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (_msg_header, message)\n\u001b[1;32m--> 628\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrouble(error_message, tb)\n",
"File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:598\u001b[0m, in \u001b[0;36mYoutubeDL.trouble\u001b[1;34m(self, message, tb)\u001b[0m\n\u001b[0;32m 597\u001b[0m exc_info \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n\u001b[1;32m--> 598\u001b[0m \u001b[39mraise\u001b[39;00m DownloadError(message, exc_info)\n\u001b[0;32m 599\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_download_retcode \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
"\u001b[1;31mDownloadError\u001b[0m: ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\subtitle.ipynb Cell 6\u001b[0m in \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttps://www.youtube.com/watch?v=\u001b[39m\u001b[39m{\u001b[39;00mvideo_ids[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39m# getting video\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m video \u001b[39m=\u001b[39m pafy\u001b[39m.\u001b[39;49mnew(url) \n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m value \u001b[39m=\u001b[39m video\u001b[39m.\u001b[39mtitle\n\u001b[0;32m <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m value \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39msub(\u001b[39mr\u001b[39m\u001b[39m'\u001b[39m\u001b[39m[^\u001b[39m\u001b[39m\\\u001b[39m\u001b[39mw]\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m, value) \u001b[39m# remove any special characters that are not alphabet or numbers\u001b[39;00m\n",
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\pafy.py:124\u001b[0m, in \u001b[0;36mnew\u001b[1;34m(url, basic, gdata, size, callback, ydl_opts)\u001b[0m\n\u001b[0;32m 121\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 122\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbackend_youtube_dl\u001b[39;00m \u001b[39mimport\u001b[39;00m YtdlPafy \u001b[39mas\u001b[39;00m Pafy\n\u001b[1;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m Pafy(url, basic, gdata, size, callback, ydl_opts\u001b[39m=\u001b[39;49mydl_opts)\n",
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:30\u001b[0m, in \u001b[0;36mYtdlPafy.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[39mif\u001b[39;00m ydl_opts:\n\u001b[0;32m 29\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ydl_opts\u001b[39m.\u001b[39mupdate(ydl_opts)\n\u001b[1;32m---> 30\u001b[0m \u001b[39msuper\u001b[39m(YtdlPafy, \u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_shared.py:97\u001b[0m, in \u001b[0;36mBasePafy.__init__\u001b[1;34m(self, video_url, basic, gdata, size, callback, ydl_opts)\u001b[0m\n\u001b[0;32m 94\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexpiry \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m 96\u001b[0m \u001b[39mif\u001b[39;00m basic:\n\u001b[1;32m---> 97\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fetch_basic()\n\u001b[0;32m 99\u001b[0m \u001b[39mif\u001b[39;00m gdata:\n\u001b[0;32m 100\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_fetch_gdata()\n",
"File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:42\u001b[0m, in \u001b[0;36mYtdlPafy._fetch_basic\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 40\u001b[0m \u001b[39m# Turn into an IOError since that is what pafy previously raised\u001b[39;00m\n\u001b[0;32m 41\u001b[0m \u001b[39mexcept\u001b[39;00m youtube_dl\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mDownloadError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m---> 42\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mIOError\u001b[39;00m(\u001b[39mstr\u001b[39m(e)\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39mYouTube said\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mYoutube says\u001b[39m\u001b[39m'\u001b[39m))\n\u001b[0;32m 44\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback:\n\u001b[0;32m 45\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback(\u001b[39m\"\u001b[39m\u001b[39mFetched video info\u001b[39m\u001b[39m\"\u001b[39m)\n",
"\u001b[1;31mOSError\u001b[0m: ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output."
]
}
],
"source": [
"## getting title from youtube video by its link\n",
"\n",
"import pafy_develop.pafy.pafy as pafy\n",
" \n",
"# url of video \n",
"url = f\"https://www.youtube.com/watch?v={video_ids[0]}\"\n",
" \n",
"# getting video\n",
"video = pafy.new(url) \n",
"value = video.title\n",
"value = re.sub(r'[^\\w]', ' ', value) # remove any special characters that are not alphabet or numbers\n",
" \n",
"# printing the value\n",
"print(\"Title : \" + value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The subtitle is going to be created as {value}.txt. Please check\n"
]
}
],
"source": [
"print(\"The subtitle is going to be created as {value}.txt. Please check\")\n",
"\n",
"get_article_to_txt(video_ids, languages, txt_name= value)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: '25 Symmetric Matrices and Positive Definiteness.txt'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn [6], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39m## preview the result:\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m{\u001b[39;49;00mvalue\u001b[39m}\u001b[39;49;00m\u001b[39m.txt\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m 4\u001b[0m A \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mread()\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(A)\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '25 Symmetric Matrices and Positive Definiteness.txt'"
]
}
],
"source": [
"## preview the result:\n",
"\n",
"with open(f\"{value}.txt\", 'r') as f:\n",
" A = f.read()\n",
" print(A)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "c19fa61d258bb2b35aae2ada233c33e2817c1ce895aa48acba720c6bf7cbe3cb"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment