Skip to content

Instantly share code, notes, and snippets.

@subtleGradient
Last active September 30, 2022 23:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save subtleGradient/76d71f9bc8d5d947e1494c2f113d8e46 to your computer and use it in GitHub Desktop.
Save subtleGradient/76d71f9bc8d5d947e1494c2f113d8e46 to your computer and use it in GitHub Desktop.
Command line python script to search YouTube
#!/usr/bin/env python3
import json
import os
def install():
os.environ['PYPPETEER_CHROMIUM_REVISION'] = '1052357'
os.system('python3 -m pip install -q pyppeteer pyppeteer_stealth')
os.system('pyppeteer-install')
async def get_videos():
'''get all the video links and metadata from the page'''
global page
return await page.evaluate('''() => {
try {
return Array.from(document.querySelectorAll("ytd-video-renderer")).map(video => ({
title: video.querySelector("#video-title")?.innerText.trim(),
url: video.querySelector("#video-title")?.href.trim(),
length: video.querySelector("ytd-thumbnail-overlay-time-status-renderer")?.innerText.trim(),
views: video.querySelector("#metadata-line span:nth-child(1)")?.innerText.trim(),
date: video.querySelector("#metadata-line span:nth-child(2)")?.innerText.trim(),
channel_name: video.querySelector("ytd-channel-name a")?.textContent.trim(),
channel_url: video.querySelector("ytd-channel-name a")?.href.trim(),
}))
} catch (error) {
return [{ error: error.message }]
}
}''')
def url_for_youtube_search(search_query, over_20_min=True):
import urllib.parse
url = 'https://www.youtube.com/results?search_query=jason+statham+interview&sp=EgIYAg%253D%253D'
# replace the querystring with an encoded version of the new querystring
url_parts = urllib.parse.urlsplit(url)
query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
query['search_query'] = search_query
if over_20_min:
query['sp'] = 'EgIYAg%3D%3D'
else:
del query['sp']
new_querystring = urllib.parse.urlencode(query, doseq=True)
url_parts = url_parts._replace(query=new_querystring)
url = urllib.parse.urlunsplit(url_parts)
return url
assert url_for_youtube_search(
'jason statham interview') == 'https://www.youtube.com/results?search_query=jason+statham+interview&sp=EgIYAg%253D%253D'
assert url_for_youtube_search(
'jason statham interview', over_20_min=False) == 'https://www.youtube.com/results?search_query=jason+statham+interview'
# for each line of input, search youtube and print the results
async def search_youtube(search_string="jason statham interview"):
global browser, page
import pyppeteer
from pyppeteer import launch
from pyppeteer_stealth import stealth
chromiumPathLatest = pyppeteer.executablePath().replace(
pyppeteer.__chromium_revision__, '1052357')
browser = await launch({'headless': True, 'executablePath': chromiumPathLatest})
page = await browser.newPage()
await stealth(page)
await page.goto(url_for_youtube_search(search_string))
videos = await get_videos()
return videos
# run the script
if __name__ == "__main__":
# parse --skip-install and -q search_string arguments
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--skip-install', action='store_true',
help='skip installing dependencies')
parser.add_argument('-s', '--search',
help='the query to search youtube with', required=True)
# format json, jsonl, or youtube-dl compatible text
parser.add_argument('-f', '--format', choices=['json', 'jsonl', 'youtube-dl', 'csv'],
help='the format to print the results in', default='youtube-dl')
args = parser.parse_args()
if not args.skip_install:
install()
import asyncio
videos = asyncio.get_event_loop().run_until_complete(
search_youtube(args.search))
if args.format == 'json':
print(json.dumps(videos, indent=4))
elif args.format == 'jsonl':
for video in videos:
print(json.dumps(video))
elif args.format == 'youtube-dl':
print(f'# Search results for "{args.search}"')
for video in videos:
print('# ' + json.dumps(video))
print(video['url'])
print('# end\n')
elif args.format == 'csv':
import csv
import sys
writer = csv.writer(sys.stdout)
writer.writerow(['title', 'url', 'length', 'views',
'date', 'channel_name', 'channel_url'])
for video in videos:
writer.writerow([video['title'], video['url'], video['length'], video['views'],
video['date'], video['channel_name'], video['channel_url']])
@subtleGradient
Copy link
Author

The code above does the following, explained in English:

  1. Install the pyppeteer and pyppeteer_stealth dependencies
  2. Launch a headless chromium browser
  3. Search youtube for the query provided
  4. Print the results

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment