subtleGradient/search_youtube.py

## search_youtube.py
#!/usr/bin/env python3

import json
import os


def install():
    os.environ['PYPPETEER_CHROMIUM_REVISION'] = '1052357'
    os.system('python3 -m pip install -q pyppeteer pyppeteer_stealth')
    os.system('pyppeteer-install')


async def get_videos():
    '''get all the video links and metadata from the page'''
    global page
    return await page.evaluate('''() => {
        try {
            return Array.from(document.querySelectorAll("ytd-video-renderer")).map(video => ({
                title:        video.querySelector("#video-title")?.innerText.trim(),
                url:          video.querySelector("#video-title")?.href.trim(),
                length:       video.querySelector("ytd-thumbnail-overlay-time-status-renderer")?.innerText.trim(),
                views:        video.querySelector("#metadata-line span:nth-child(1)")?.innerText.trim(),
                date:         video.querySelector("#metadata-line span:nth-child(2)")?.innerText.trim(),
                channel_name: video.querySelector("ytd-channel-name a")?.textContent.trim(),
                channel_url:  video.querySelector("ytd-channel-name a")?.href.trim(),
            }))
        } catch (error) {
            return [{ error: error.message }]
        }
    }''')


def url_for_youtube_search(search_query, over_20_min=True):
    import urllib.parse
    url = 'https://www.youtube.com/results?search_query=jason+statham+interview&sp=EgIYAg%253D%253D'

    # replace the querystring with an encoded version of the new querystring
    url_parts = urllib.parse.urlsplit(url)
    query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
    query['search_query'] = search_query
    if over_20_min:
        query['sp'] = 'EgIYAg%3D%3D'
    else:
        del query['sp']

    new_querystring = urllib.parse.urlencode(query, doseq=True)
    url_parts = url_parts._replace(query=new_querystring)
    url = urllib.parse.urlunsplit(url_parts)
    return url


assert url_for_youtube_search(
    'jason statham interview') == 'https://www.youtube.com/results?search_query=jason+statham+interview&sp=EgIYAg%253D%253D'
assert url_for_youtube_search(
    'jason statham interview', over_20_min=False) == 'https://www.youtube.com/results?search_query=jason+statham+interview'


# for each line of input, search youtube and print the results
async def search_youtube(search_string="jason statham interview"):
    global browser, page
    import pyppeteer
    from pyppeteer import launch
    from pyppeteer_stealth import stealth

    chromiumPathLatest = pyppeteer.executablePath().replace(
        pyppeteer.__chromium_revision__, '1052357')

    browser = await launch({'headless': True, 'executablePath': chromiumPathLatest})
    page = await browser.newPage()
    await stealth(page)

    await page.goto(url_for_youtube_search(search_string))
    videos = await get_videos()
    return videos

# run the script
if __name__ == "__main__":
    # parse --skip-install and -q search_string arguments
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--skip-install', action='store_true',
                        help='skip installing dependencies')
    parser.add_argument('-s', '--search',
                        help='the query to search youtube with', required=True)
    # format json, jsonl, or youtube-dl compatible text
    parser.add_argument('-f', '--format', choices=['json', 'jsonl', 'youtube-dl', 'csv'],
                        help='the format to print the results in', default='youtube-dl')
    args = parser.parse_args()

    if not args.skip_install:
        install()

    import asyncio
    videos = asyncio.get_event_loop().run_until_complete(
        search_youtube(args.search))

    if args.format == 'json':
        print(json.dumps(videos, indent=4))

    elif args.format == 'jsonl':
        for video in videos:
            print(json.dumps(video))

    elif args.format == 'youtube-dl':
        print(f'# Search results for "{args.search}"')
        for video in videos:
            print('# ' + json.dumps(video))
            print(video['url'])
        print('# end\n')

    elif args.format == 'csv':
        import csv
        import sys
        writer = csv.writer(sys.stdout)
        writer.writerow(['title', 'url', 'length', 'views',
                        'date', 'channel_name', 'channel_url'])
        for video in videos:
            writer.writerow([video['title'], video['url'], video['length'], video['views'],
                            video['date'], video['channel_name'], video['channel_url']])
	#!/usr/bin/env python3

	import json
	import os


	def install():
	os.environ['PYPPETEER_CHROMIUM_REVISION'] = '1052357'
	os.system('python3 -m pip install -q pyppeteer pyppeteer_stealth')
	os.system('pyppeteer-install')


	async def get_videos():
	'''get all the video links and metadata from the page'''
	global page
	return await page.evaluate('''() => {
	try {
	return Array.from(document.querySelectorAll("ytd-video-renderer")).map(video => ({
	title: video.querySelector("#video-title")?.innerText.trim(),
	url: video.querySelector("#video-title")?.href.trim(),
	length: video.querySelector("ytd-thumbnail-overlay-time-status-renderer")?.innerText.trim(),
	views: video.querySelector("#metadata-line span:nth-child(1)")?.innerText.trim(),
	date: video.querySelector("#metadata-line span:nth-child(2)")?.innerText.trim(),
	channel_name: video.querySelector("ytd-channel-name a")?.textContent.trim(),
	channel_url: video.querySelector("ytd-channel-name a")?.href.trim(),
	}))
	} catch (error) {
	return [{ error: error.message }]
	}
	}''')


	def url_for_youtube_search(search_query, over_20_min=True):
	import urllib.parse
	url = 'https://www.youtube.com/results?search_query=jason+statham+interview&sp=EgIYAg%253D%253D'

	# replace the querystring with an encoded version of the new querystring
	url_parts = urllib.parse.urlsplit(url)
	query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
	query['search_query'] = search_query
	if over_20_min:
	query['sp'] = 'EgIYAg%3D%3D'
	else:
	del query['sp']

	new_querystring = urllib.parse.urlencode(query, doseq=True)
	url_parts = url_parts._replace(query=new_querystring)
	url = urllib.parse.urlunsplit(url_parts)
	return url


	assert url_for_youtube_search(
	'jason statham interview') == 'https://www.youtube.com/results?search_query=jason+statham+interview&sp=EgIYAg%253D%253D'
	assert url_for_youtube_search(
	'jason statham interview', over_20_min=False) == 'https://www.youtube.com/results?search_query=jason+statham+interview'


	# for each line of input, search youtube and print the results
	async def search_youtube(search_string="jason statham interview"):
	global browser, page
	import pyppeteer
	from pyppeteer import launch
	from pyppeteer_stealth import stealth

	chromiumPathLatest = pyppeteer.executablePath().replace(
	pyppeteer.__chromium_revision__, '1052357')

	browser = await launch({'headless': True, 'executablePath': chromiumPathLatest})
	page = await browser.newPage()
	await stealth(page)

	await page.goto(url_for_youtube_search(search_string))
	videos = await get_videos()
	return videos

	# run the script
	if __name__ == "__main__":
	# parse --skip-install and -q search_string arguments
	import argparse
	parser = argparse.ArgumentParser()
	parser.add_argument('--skip-install', action='store_true',
	help='skip installing dependencies')
	parser.add_argument('-s', '--search',
	help='the query to search youtube with', required=True)
	# format json, jsonl, or youtube-dl compatible text
	parser.add_argument('-f', '--format', choices=['json', 'jsonl', 'youtube-dl', 'csv'],
	help='the format to print the results in', default='youtube-dl')
	args = parser.parse_args()

	if not args.skip_install:
	install()

	import asyncio
	videos = asyncio.get_event_loop().run_until_complete(
	search_youtube(args.search))

	if args.format == 'json':
	print(json.dumps(videos, indent=4))

	elif args.format == 'jsonl':
	for video in videos:
	print(json.dumps(video))

	elif args.format == 'youtube-dl':
	print(f'# Search results for "{args.search}"')
	for video in videos:
	print('# ' + json.dumps(video))
	print(video['url'])
	print('# end\n')

	elif args.format == 'csv':
	import csv
	import sys
	writer = csv.writer(sys.stdout)
	writer.writerow(['title', 'url', 'length', 'views',
	'date', 'channel_name', 'channel_url'])
	for video in videos:
	writer.writerow([video['title'], video['url'], video['length'], video['views'],
	video['date'], video['channel_name'], video['channel_url']])