pakkinlau/youtube_subtitle.ipynb

## youtube_subtitle.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading the model:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from youtube_transcript_api import YouTubeTranscriptApi\n",
    "import re"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## User inputs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For this video (PNKj529yY5c) transcripts are available in the following languages:\n",
      "\n",
      "(MANUALLY CREATED)\n",
      " - en (\"英文 - CC\")[TRANSLATABLE]\n",
      " - ko (\"韓文\")[TRANSLATABLE]\n",
      "\n",
      "(GENERATED)\n",
      " - en (\"英文 (自動產生)\")[TRANSLATABLE]\n",
      "\n",
      "(TRANSLATION LANGUAGES)\n",
      " - tr (\"土耳其文\")\n",
      " - tk (\"土庫曼文\")\n",
      " - lg (\"干達文\")\n",
      " - zh-Hant (\"中文（繁體字）\")\n",
      " - zh-Hans (\"中文（簡體字）\")\n",
      " - da (\"丹麥文\")\n",
      " - eu (\"巴斯克文\")\n",
      " - ja (\"日文\")\n",
      " - mi (\"毛利文\")\n",
      " - jv (\"爪哇文\")\n",
      " - eo (\"世界語\")\n",
      " - gl (\"加里西亞文\")\n",
      " - ca (\"加泰隆尼亞文\")\n",
      " - nso (\"北索托文\")\n",
      " - gu (\"古吉拉特文\")\n",
      " - sw (\"史瓦希里文\")\n",
      " - ne (\"尼泊爾文\")\n",
      " - ny (\"尼揚賈文\")\n",
      " - gn (\"瓜拉尼文\")\n",
      " - be (\"白俄羅斯文\")\n",
      " - lt (\"立陶宛文\")\n",
      " - ig (\"伊博文\")\n",
      " - is (\"冰島文\")\n",
      " - hu (\"匈牙利文\")\n",
      " - id (\"印尼文\")\n",
      " - hi (\"印地文\")\n",
      " - ky (\"吉爾吉斯文\")\n",
      " - lo (\"老撾文\")\n",
      " - ay (\"艾馬拉文\")\n",
      " - fy (\"西弗里西亞文\")\n",
      " - es (\"西班牙文\")\n",
      " - kri (\"克裡奧爾文\")\n",
      " - hr (\"克羅地亞文\")\n",
      " - kn (\"坎納達文\")\n",
      " - iw (\"希伯來文\")\n",
      " - el (\"希臘文\")\n",
      " - hy (\"亞美尼亞文\")\n",
      " - bn (\"孟加拉文\")\n",
      " - la (\"拉丁文\")\n",
      " - lv (\"拉脫維亞文\")\n",
      " - ln (\"林加拉文\")\n",
      " - fr (\"法文\")\n",
      " - fa (\"波斯文\")\n",
      " - bs (\"波斯尼亞文\")\n",
      " - pl (\"波蘭文\")\n",
      " - fi (\"芬蘭文\")\n",
      " - ak (\"阿坎文\")\n",
      " - am (\"阿姆哈拉文\")\n",
      " - ar (\"阿拉伯文\")\n",
      " - az (\"阿塞拜疆文\")\n",
      " - sq (\"阿爾巴尼亞文\")\n",
      " - as (\"阿薩姆文\")\n",
      " - ru (\"俄文\")\n",
      " - bg (\"保加利亞文\")\n",
      " - sd (\"信德文\")\n",
      " - af (\"南非荷蘭文\")\n",
      " - kk (\"哈薩克文\")\n",
      " - cy (\"威爾斯文\")\n",
      " - co (\"科西嘉文\")\n",
      " - xh (\"科薩文\")\n",
      " - yo (\"約魯巴文\")\n",
      " - hmn (\"苗語\")\n",
      " - en (\"英文\")\n",
      " - dv (\"迪維西文\")\n",
      " - sn (\"修納文\")\n",
      " - ee (\"埃維文\")\n",
      " - haw (\"夏威夷文\")\n",
      " - ku (\"庫德文\")\n",
      " - no (\"挪威文\")\n",
      " - pa (\"旁遮普文\")\n",
      " - ka (\"格魯吉亞文\")\n",
      " - th (\"泰文\")\n",
      " - ta (\"泰米爾文\")\n",
      " - te (\"泰盧固文\")\n",
      " - ht (\"海地文\")\n",
      " - uk (\"烏克蘭文\")\n",
      " - uz (\"烏茲別克文\")\n",
      " - ur (\"烏爾都文\")\n",
      " - ts (\"特松加文\")\n",
      " - zu (\"祖魯文\")\n",
      " - so (\"索馬里文\")\n",
      " - mt (\"馬耳他文\")\n",
      " - ms (\"馬來文\")\n",
      " - mk (\"馬其頓文\")\n",
      " - mg (\"馬拉加斯文\")\n",
      " - mr (\"馬拉地文\")\n",
      " - ml (\"馬拉雅拉姆文\")\n",
      " - km (\"高棉文\")\n",
      " - ceb (\"宿霧文\")\n",
      " - cs (\"捷克文\")\n",
      " - sa (\"梵文\")\n",
      " - nl (\"荷蘭文\")\n",
      " - bho (\"博傑普爾文\")\n",
      " - su (\"巽他文\")\n",
      " - ti (\"提格利尼亞文\")\n",
      " - sl (\"斯洛文尼亞文\")\n",
      " - sk (\"斯洛伐克文\")\n",
      " - ps (\"普什圖文\")\n",
      " - fil (\"菲律賓文\")\n",
      " - vi (\"越南文\")\n",
      " - tg (\"塔吉克文\")\n",
      " - st (\"塞索托文\")\n",
      " - sr (\"塞爾維亞文\")\n",
      " - or (\"奧里雅文\")\n",
      " - om (\"奧羅莫文\")\n",
      " - it (\"意大利文\")\n",
      " - yi (\"意第緒文\")\n",
      " - et (\"愛沙尼亞文\")\n",
      " - ga (\"愛爾蘭文\")\n",
      " - sv (\"瑞典文\")\n",
      " - pt (\"葡萄牙文\")\n",
      " - si (\"僧伽羅文\")\n",
      " - ug (\"維吾爾文\")\n",
      " - mn (\"蒙古文\")\n",
      " - qu (\"蓋楚瓦文\")\n",
      " - ha (\"豪撒文\")\n",
      " - de (\"德文\")\n",
      " - my (\"緬甸文\")\n",
      " - rw (\"盧旺達文\")\n",
      " - lb (\"盧森堡文\")\n",
      " - ko (\"韓文\")\n",
      " - sm (\"薩摩亞文\")\n",
      " - ro (\"羅馬尼亞文\")\n",
      " - gd (\"蘇格蘭蓋爾文\")\n",
      " - tt (\"韃靼文\")\n"
     ]
    }
   ],
   "source": [
    "video_ids = [\"PNKj529yY5c\"]\n",
    "languages = ['en']\n",
    "\n",
    "# Language (see below if not certain): \n",
    "print(YouTubeTranscriptApi.list_transcripts(video_id = video_ids[0]))\n",
    "\n",
    "# transcript = YouTubeTranscriptApi.get_transcripts(video_ids, languages=languages)\n",
    "# print(transcript)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_article_to_txt(video_ids, languages, txt_name='article'):\n",
    "    transcript = YouTubeTranscriptApi.get_transcripts(video_ids, languages=languages)\n",
    "    dict_form = transcript[0]\n",
    "    first_key = next(iter(transcript[0]))\n",
    "    first_value: list = dict_form[first_key] # which is a list of key value pairs\n",
    "    \n",
    "    article = \"\"\n",
    "    for elt in first_value:\n",
    "        # print(elt['text'])\n",
    "        article = article + elt['text'] + \" \"\n",
    "    \n",
    "    pat = ('(?<!Dr)(?<!Esq)\\. +(?=[A-Z])')\n",
    "    formatted_article = re.sub(pat,'.\\n',article)\n",
    "    formatted_article = article.replace('\\n',' ').replace('.','.\\n').replace('?','?\\n')\n",
    "    \n",
    "    with open(txt_name, 'w') as f:\n",
    "        f.write(f\"{formatted_article}.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.\n"
     ]
    },
    {
     "ename": "OSError",
     "evalue": "ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mRegexNotFoundError\u001b[0m                        Traceback (most recent call last)",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:815\u001b[0m, in \u001b[0;36mYoutubeDL.__handle_extraction_exceptions.<locals>.wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    814\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 815\u001b[0m     \u001b[39mreturn\u001b[39;00m func(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m    816\u001b[0m \u001b[39mexcept\u001b[39;00m GeoRestrictedError \u001b[39mas\u001b[39;00m e:\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:836\u001b[0m, in \u001b[0;36mYoutubeDL.__extract_info\u001b[1;34m(self, url, ie, download, extra_info, process)\u001b[0m\n\u001b[0;32m    834\u001b[0m \u001b[39m@__handle_extraction_exceptions\u001b[39m\n\u001b[0;32m    835\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__extract_info\u001b[39m(\u001b[39mself\u001b[39m, url, ie, download, extra_info, process):\n\u001b[1;32m--> 836\u001b[0m     ie_result \u001b[39m=\u001b[39m ie\u001b[39m.\u001b[39;49mextract(url)\n\u001b[0;32m    837\u001b[0m     \u001b[39mif\u001b[39;00m ie_result \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:  \u001b[39m# Finished already (backwards compatibility; listformats and friends should be moved here)\u001b[39;00m\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\common.py:534\u001b[0m, in \u001b[0;36mInfoExtractor.extract\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m    533\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39minitialize()\n\u001b[1;32m--> 534\u001b[0m ie_result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_real_extract(url)\n\u001b[0;32m    535\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_x_forwarded_for_ip:\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\youtube.py:1794\u001b[0m, in \u001b[0;36mYoutubeIE._real_extract\u001b[1;34m(self, url)\u001b[0m\n\u001b[0;32m   1782\u001b[0m owner_profile_url \u001b[39m=\u001b[39m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mownerProfileUrl\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m   1784\u001b[0m info \u001b[39m=\u001b[39m {\n\u001b[0;32m   1785\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m: video_id,\n\u001b[0;32m   1786\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mtitle\u001b[39m\u001b[39m'\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_live_title(video_title) \u001b[39mif\u001b[39;00m is_live \u001b[39melse\u001b[39;00m video_title,\n\u001b[0;32m   1787\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mformats\u001b[39m\u001b[39m'\u001b[39m: formats,\n\u001b[0;32m   1788\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mthumbnails\u001b[39m\u001b[39m'\u001b[39m: thumbnails,\n\u001b[0;32m   1789\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mdescription\u001b[39m\u001b[39m'\u001b[39m: video_description,\n\u001b[0;32m   1790\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mupload_date\u001b[39m\u001b[39m'\u001b[39m: unified_strdate(\n\u001b[0;32m   1791\u001b[0m         microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39muploadDate\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m   1792\u001b[0m         \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39muploadDate\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m   1793\u001b[0m     \u001b[39m'\u001b[39m\u001b[39muploader\u001b[39m\u001b[39m'\u001b[39m: video_details[\u001b[39m'\u001b[39m\u001b[39mauthor\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m-> 1794\u001b[0m     \u001b[39m'\u001b[39m\u001b[39muploader_id\u001b[39m\u001b[39m'\u001b[39m: \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_search_regex(\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m/(?:channel|user)/([^/?&#]+)\u001b[39;49m\u001b[39m'\u001b[39;49m, owner_profile_url, \u001b[39m'\u001b[39;49m\u001b[39muploader id\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mif\u001b[39;00m owner_profile_url \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m   1795\u001b[0m     \u001b[39m'\u001b[39m\u001b[39muploader_url\u001b[39m\u001b[39m'\u001b[39m: owner_profile_url,\n\u001b[0;32m   1796\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mchannel_id\u001b[39m\u001b[39m'\u001b[39m: channel_id,\n\u001b[0;32m   1797\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mchannel_url\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m'\u001b[39m\u001b[39mhttps://www.youtube.com/channel/\u001b[39m\u001b[39m'\u001b[39m \u001b[39m+\u001b[39m channel_id \u001b[39mif\u001b[39;00m channel_id \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m   1798\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mduration\u001b[39m\u001b[39m'\u001b[39m: duration,\n\u001b[0;32m   1799\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mview_count\u001b[39m\u001b[39m'\u001b[39m: int_or_none(\n\u001b[0;32m   1800\u001b[0m         video_details\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mviewCount\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m   1801\u001b[0m         \u001b[39mor\u001b[39;00m microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mviewCount\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m   1802\u001b[0m         \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39minteractionCount\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m   1803\u001b[0m     \u001b[39m'\u001b[39m\u001b[39maverage_rating\u001b[39m\u001b[39m'\u001b[39m: float_or_none(video_details\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39maverageRating\u001b[39m\u001b[39m'\u001b[39m)),\n\u001b[0;32m   1804\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mage_limit\u001b[39m\u001b[39m'\u001b[39m: \u001b[39m18\u001b[39m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m   1805\u001b[0m         microformat\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39misFamilySafe\u001b[39m\u001b[39m'\u001b[39m) \u001b[39mis\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m   1806\u001b[0m         \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39misFamilyFriendly\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mfalse\u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m   1807\u001b[0m         \u001b[39mor\u001b[39;00m search_meta(\u001b[39m'\u001b[39m\u001b[39mog:restrictions:age\u001b[39m\u001b[39m'\u001b[39m) \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39m18+\u001b[39m\u001b[39m'\u001b[39m) \u001b[39melse\u001b[39;00m \u001b[39m0\u001b[39m,\n\u001b[0;32m   1808\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mwebpage_url\u001b[39m\u001b[39m'\u001b[39m: webpage_url,\n\u001b[0;32m   1809\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m'\u001b[39m: [category] \u001b[39mif\u001b[39;00m category \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m   1810\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mtags\u001b[39m\u001b[39m'\u001b[39m: keywords,\n\u001b[0;32m   1811\u001b[0m     \u001b[39m'\u001b[39m\u001b[39mis_live\u001b[39m\u001b[39m'\u001b[39m: is_live,\n\u001b[0;32m   1812\u001b[0m }\n\u001b[0;32m   1814\u001b[0m pctr \u001b[39m=\u001b[39m try_get(\n\u001b[0;32m   1815\u001b[0m     player_response,\n\u001b[0;32m   1816\u001b[0m     \u001b[39mlambda\u001b[39;00m x: x[\u001b[39m'\u001b[39m\u001b[39mcaptions\u001b[39m\u001b[39m'\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mplayerCaptionsTracklistRenderer\u001b[39m\u001b[39m'\u001b[39m], \u001b[39mdict\u001b[39m)\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\extractor\\common.py:1012\u001b[0m, in \u001b[0;36mInfoExtractor._search_regex\u001b[1;34m(self, pattern, string, name, default, fatal, flags, group)\u001b[0m\n\u001b[0;32m   1011\u001b[0m \u001b[39melif\u001b[39;00m fatal:\n\u001b[1;32m-> 1012\u001b[0m     \u001b[39mraise\u001b[39;00m RegexNotFoundError(\u001b[39m'\u001b[39m\u001b[39mUnable to extract \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m _name)\n\u001b[0;32m   1013\u001b[0m \u001b[39melse\u001b[39;00m:\n",
      "\u001b[1;31mRegexNotFoundError\u001b[0m: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mDownloadError\u001b[0m                             Traceback (most recent call last)",
      "File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:39\u001b[0m, in \u001b[0;36mYtdlPafy._fetch_basic\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     38\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 39\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ydl_info \u001b[39m=\u001b[39m ydl\u001b[39m.\u001b[39;49mextract_info(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mvideoid, download\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n\u001b[0;32m     40\u001b[0m \u001b[39m# Turn into an IOError since that is what pafy previously raised\u001b[39;00m\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:808\u001b[0m, in \u001b[0;36mYoutubeDL.extract_info\u001b[1;34m(self, url, download, ie_key, extra_info, process, force_generic_extractor)\u001b[0m\n\u001b[0;32m    805\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreport_warning(\u001b[39m'\u001b[39m\u001b[39mThe program functionality for this site has been marked as broken, \u001b[39m\u001b[39m'\u001b[39m\n\u001b[0;32m    806\u001b[0m                             \u001b[39m'\u001b[39m\u001b[39mand will probably not work.\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m--> 808\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__extract_info(url, ie, download, extra_info, process)\n\u001b[0;32m    809\u001b[0m \u001b[39melse\u001b[39;00m:\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:824\u001b[0m, in \u001b[0;36mYoutubeDL.__handle_extraction_exceptions.<locals>.wrapper\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    823\u001b[0m \u001b[39mexcept\u001b[39;00m ExtractorError \u001b[39mas\u001b[39;00m e:  \u001b[39m# An error we somewhat expected\u001b[39;00m\n\u001b[1;32m--> 824\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreport_error(compat_str(e), e\u001b[39m.\u001b[39;49mformat_traceback())\n\u001b[0;32m    825\u001b[0m \u001b[39mexcept\u001b[39;00m MaxDownloadsReached:\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:628\u001b[0m, in \u001b[0;36mYoutubeDL.report_error\u001b[1;34m(self, message, tb)\u001b[0m\n\u001b[0;32m    627\u001b[0m error_message \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m \u001b[39m%\u001b[39m (_msg_header, message)\n\u001b[1;32m--> 628\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrouble(error_message, tb)\n",
      "File \u001b[1;32mc:\\python310\\lib\\site-packages\\youtube_dl\\YoutubeDL.py:598\u001b[0m, in \u001b[0;36mYoutubeDL.trouble\u001b[1;34m(self, message, tb)\u001b[0m\n\u001b[0;32m    597\u001b[0m         exc_info \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n\u001b[1;32m--> 598\u001b[0m     \u001b[39mraise\u001b[39;00m DownloadError(message, exc_info)\n\u001b[0;32m    599\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_download_retcode \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n",
      "\u001b[1;31mDownloadError\u001b[0m: ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "\u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\subtitle.ipynb Cell 6\u001b[0m in \u001b[0;36m9\n\u001b[0;32m      <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m url \u001b[39m=\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttps://www.youtube.com/watch?v=\u001b[39m\u001b[39m{\u001b[39;00mvideo_ids[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m      <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39m# getting video\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m video \u001b[39m=\u001b[39m pafy\u001b[39m.\u001b[39;49mnew(url) \n\u001b[0;32m     <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m value \u001b[39m=\u001b[39m video\u001b[39m.\u001b[39mtitle\n\u001b[0;32m     <a href='vscode-notebook-cell:/d%3A/All_programming_projects/Python%20coding%20gym/Youtube%20script%20download/subtitle.ipynb#W5sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m value \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39msub(\u001b[39mr\u001b[39m\u001b[39m'\u001b[39m\u001b[39m[^\u001b[39m\u001b[39m\\\u001b[39m\u001b[39mw]\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m, value) \u001b[39m# remove any special characters that are not alphabet or numbers\u001b[39;00m\n",
      "File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\pafy.py:124\u001b[0m, in \u001b[0;36mnew\u001b[1;34m(url, basic, gdata, size, callback, ydl_opts)\u001b[0m\n\u001b[0;32m    121\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    122\u001b[0m        \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mbackend_youtube_dl\u001b[39;00m \u001b[39mimport\u001b[39;00m YtdlPafy \u001b[39mas\u001b[39;00m Pafy\n\u001b[1;32m--> 124\u001b[0m \u001b[39mreturn\u001b[39;00m Pafy(url, basic, gdata, size, callback, ydl_opts\u001b[39m=\u001b[39;49mydl_opts)\n",
      "File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:30\u001b[0m, in \u001b[0;36mYtdlPafy.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m     28\u001b[0m \u001b[39mif\u001b[39;00m ydl_opts:\n\u001b[0;32m     29\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ydl_opts\u001b[39m.\u001b[39mupdate(ydl_opts)\n\u001b[1;32m---> 30\u001b[0m \u001b[39msuper\u001b[39m(YtdlPafy, \u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
      "File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_shared.py:97\u001b[0m, in \u001b[0;36mBasePafy.__init__\u001b[1;34m(self, video_url, basic, gdata, size, callback, ydl_opts)\u001b[0m\n\u001b[0;32m     94\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexpiry \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m     96\u001b[0m \u001b[39mif\u001b[39;00m basic:\n\u001b[1;32m---> 97\u001b[0m     \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fetch_basic()\n\u001b[0;32m     99\u001b[0m \u001b[39mif\u001b[39;00m gdata:\n\u001b[0;32m    100\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_fetch_gdata()\n",
      "File \u001b[1;32md:\\All_programming_projects\\Python coding gym\\Youtube script download\\pafy_develop\\pafy\\backend_youtube_dl.py:42\u001b[0m, in \u001b[0;36mYtdlPafy._fetch_basic\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     40\u001b[0m     \u001b[39m# Turn into an IOError since that is what pafy previously raised\u001b[39;00m\n\u001b[0;32m     41\u001b[0m     \u001b[39mexcept\u001b[39;00m youtube_dl\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mDownloadError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m---> 42\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mIOError\u001b[39;00m(\u001b[39mstr\u001b[39m(e)\u001b[39m.\u001b[39mreplace(\u001b[39m'\u001b[39m\u001b[39mYouTube said\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39mYoutube says\u001b[39m\u001b[39m'\u001b[39m))\n\u001b[0;32m     44\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback:\n\u001b[0;32m     45\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback(\u001b[39m\"\u001b[39m\u001b[39mFetched video info\u001b[39m\u001b[39m\"\u001b[39m)\n",
      "\u001b[1;31mOSError\u001b[0m: ERROR: Unable to extract uploader id; please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see  https://yt-dl.org/update  on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output."
     ]
    }
   ],
   "source": [
    "## getting title from youtube video by its link\n",
    "\n",
    "import pafy_develop.pafy.pafy as pafy\n",
    "    \n",
    "# url of video \n",
    "url = f\"https://www.youtube.com/watch?v={video_ids[0]}\"\n",
    "    \n",
    "# getting video\n",
    "video = pafy.new(url) \n",
    "value = video.title\n",
    "value = re.sub(r'[^\\w]', ' ', value) # remove any special characters that are not alphabet or numbers\n",
    "  \n",
    "# printing the value\n",
    "print(\"Title : \" + value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The subtitle is going to be created as {value}.txt. Please check\n"
     ]
    }
   ],
   "source": [
    "print(\"The subtitle is going to be created as {value}.txt. Please check\")\n",
    "\n",
    "get_article_to_txt(video_ids, languages, txt_name= value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '25  Symmetric Matrices and Positive Definiteness.txt'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn [6], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[39m## preview the result:\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m{\u001b[39;49;00mvalue\u001b[39m}\u001b[39;49;00m\u001b[39m.txt\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mr\u001b[39;49m\u001b[39m'\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[0;32m      4\u001b[0m     A \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mread()\n\u001b[0;32m      5\u001b[0m     \u001b[39mprint\u001b[39m(A)\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '25  Symmetric Matrices and Positive Definiteness.txt'"
     ]
    }
   ],
   "source": [
    "## preview the result:\n",
    "\n",
    "with open(f\"{value}.txt\", 'r') as f:\n",
    "    A = f.read()\n",
    "    print(A)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.4 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "c19fa61d258bb2b35aae2ada233c33e2817c1ce895aa48acba720c6bf7cbe3cb"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}