Skip to content

Instantly share code, notes, and snippets.

@0187773933
Created July 6, 2020 23:47
Show Gist options
  • Save 0187773933/1453b2eea71ec1842ccac1794b13588e to your computer and use it in GitHub Desktop.
Save 0187773933/1453b2eea71ec1842ccac1794b13588e to your computer and use it in GitHub Desktop.
Finds Live Stream URLS for Given Youtube Channel Ids using Pyppeteer
#!/usr/bin/env python3
from bs4 import BeautifulSoup
import requests
from pprint import pprint
import json
import asyncio
from pyppeteer import launch
def write_json( file_path , python_object ):
with open( file_path , 'w', encoding='utf-8' ) as f:
json.dump( python_object , f , ensure_ascii=False , indent=4 )
async def get_channels_live_streams( channel_id ):
try:
url = f"https://www.youtube.com/channel/{channel_id}/videos?view=2&flow=grid&live_view=501"
print( f"Opening: {url}" )
browser = await launch( {'headless': True } )
page = await browser.newPage()
await page.goto( url )
print( f"Successfully Navigated to: {url}" )
live_streams = await page.evaluate('''() => {
let results = []
try{
const streams = document.querySelectorAll( "#video-title" );
for ( let i = 0; i < streams.length; ++i ) {
const live_badge = streams[i].parentNode.parentNode.parentNode.querySelectorAll( ".badge-style-type-live-now" );
if ( live_badge.length > 0 ) {
results.push({
"title": streams[i].innerText ,
"url": streams[i].getAttribute( "href" )
});
}
}
}
catch( e ) {}
return results;
}''')
await browser.close()
return live_streams
except Exception as e:
print( e )
return []
if __name__ == '__main__':
# with open( text_file_path ) as f:
# links = [ line.rstrip() for line in f ]
channels = [
{ "name": "Boiler Room" , "id": "UCGBpxWJr9FNOcFYA5GkKrMg" } ,
{ "name": "Joe Rogan" , "id": "UCzQUP1qoWDoEbmsQxvdjxgQ" } ,
{ "name": "Djuma Private Game Reserve" , "id": "UCWh93l9snW90iP2ybPHikAg" } ,
{ "name": "Monterey Bay Aquarium" , "id": "UCnM5iMGiKsZg-iOlIO2ZkdQ" } ,
{ "name": "Space Videos" , "id": "UCakgsb0w7QB0VHdnCc-OVEA" } ,
{ "name": "Explore Oceans" , "id": "UCSyg9cb3Iq-NtlbxqNB9wGw" } ,
{ "name": "Cornell Lab Bird Cams" , "id": "UCZXZQxS3d6NpR-eH_gdDwYA" } ,
{ "name": "City of Deerfield Beach" , "id": "UCGOxU-8iNOeoG3a337shXoA" } ,
{ "name": "Coral Morphologic" , "id": "UCtllXAWa3EcfcsL5tvpqGSw" } ,
{ "name": "Explore Bears and Bisons" , "id": "UC2Sk0aXLq3ADkH_USGPKT_Q" } ,
{ "name": "Adopta Manatee" , "id": "UCTSa1WxrNvaiXBaErL3Vyrw" } ,
]
for index , channel in enumerate( channels ):
print( f"\nSearching: {channel['name']} === [{index+1}] of {len(channels)}" )
channel["live_streams"] = asyncio.get_event_loop().run_until_complete( get_channels_live_streams( channel["id"] ) )
pprint( channel )
pprint( channels )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment