Skip to content

Instantly share code, notes, and snippets.

Last active November 14, 2021 01:44
Show Gist options
  • Save Leumastai/cc8a9c32fdf04bbde0cc9f5bb797ce55 to your computer and use it in GitHub Desktop.
Save Leumastai/cc8a9c32fdf04bbde0cc9f5bb797ce55 to your computer and use it in GitHub Desktop.
Script to scrape adn download youtube video
#Uncomment code if you're running virtually
""" %%capture
import sys
!pip install selenium
#!apt-get update # to update ubuntu to correctly run apt install
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver') """
pip install pytube==11.0.1
import os
from pytube import YouTube
from bs4 import BeautifulSoup
import urllib.request, urllib.parse, urllib.error
import ssl
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from import WebDriverWait
from import By
from import expected_conditions as EC
ROOT_DIR = os.getcwd()
def get_youtube_url(name):
This functions gets youtube video links
name: The name of the object
video_links: A list of video links sourced from youtube
video_links = []
#Uncomment the code if running virtually
#chrome_options = webdriver.ChromeOptions()
#driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
#comment the code below if running virtually.
driver = webdriver.Chrome(
query = f"{name}"
links = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.ID, "thumbnail")))
for link in links:
return video_links
def get_youtube_video(url, vid_name):
This function downloads a youtube video to the local directory
url: A youtube video URL link
vid_name: A name to save the video
count = 0
new_path = os.path.join(ROOT_DIR, "ytube_videos")
if not os.path.exists(new_path):
rname_vid_path = '%s/%s.mp4' % (new_path, vid_name)
if os.path.exists(rname_vid_path):
nrname_vid_path = '%s/%s%s.mp4' % (new_path, vid_name, count)
os.rename(rname_vid_path, nrname_vid_path)
yt = YouTube(url)
ys = yt.streams.get_highest_resolution()
new_fname = '%s/%s' % (new_path, ys.default_filename)
os.rename(new_fname, rname_vid_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment