ncdulo/youtube_title_grabber.py

## youtube_title_grabber.py
#!/usr/bin/env python
import requests
import urllib.request
from bs4 import BeautifulSoup

'''When given a list of YouTube URLs inside `yt.txt`, this script will
use Requests and BeautifulSoup4 to iterate over that list of URLS and
scrape video titles from each link. The output is then appended on to
the same `yt.txt` input file. Because of this appendage action, note
that running this script multiple times on the same file will result in
errors. Make sure to check the file before running and remove any lines
that are not valid YouTube URLS.

Script originally created by: dmt (not that kind, the person!)
Extended by: ncdulo

Planned additions:
 - Error handling
   - As it stands, any failed requests will cause the script to
   error out. We should be checking each request for failure.
   - When failed request occurs, do we exit? Or give the user
   the option to skip or retry? If skipped, we should include
   a "Failed" line in the output to keep the URLs lined up.'''

if __name__ == '__main__':
    titles = []
    with open('yt.txt', 'r') as f:
        for line in f:
            # Pick a URL, any URL...
            URL = line
            page = requests.get(URL)
            # Load data into bs4
            soup = BeautifulSoup(page.content, 'html.parser')
            # Find the title
            title = soup.find('span', attrs={'class': 'watch-title'})
            # Display output, append to our title list
            print(title.text)
            titles.append(title.text)

    # Strip (newlines & whitespace) for me
    strip_titles = [item.strip() for item in titles]

    with open('yt.txt', 'a') as f:
        for title in strip_titles:
            f.write(title + '\n')
	#!/usr/bin/env python
	import requests
	import urllib.request
	from bs4 import BeautifulSoup

	'''When given a list of YouTube URLs inside `yt.txt`, this script will
	use Requests and BeautifulSoup4 to iterate over that list of URLS and
	scrape video titles from each link. The output is then appended on to
	the same `yt.txt` input file. Because of this appendage action, note
	that running this script multiple times on the same file will result in
	errors. Make sure to check the file before running and remove any lines
	that are not valid YouTube URLS.

	Script originally created by: dmt (not that kind, the person!)
	Extended by: ncdulo

	Planned additions:
	- Error handling
	- As it stands, any failed requests will cause the script to
	error out. We should be checking each request for failure.
	- When failed request occurs, do we exit? Or give the user
	the option to skip or retry? If skipped, we should include
	a "Failed" line in the output to keep the URLs lined up.'''

	if __name__ == '__main__':
	titles = []
	with open('yt.txt', 'r') as f:
	for line in f:
	# Pick a URL, any URL...
	URL = line
	page = requests.get(URL)
	# Load data into bs4
	soup = BeautifulSoup(page.content, 'html.parser')
	# Find the title
	title = soup.find('span', attrs={'class': 'watch-title'})
	# Display output, append to our title list
	print(title.text)
	titles.append(title.text)

	# Strip (newlines & whitespace) for me
	strip_titles = [item.strip() for item in titles]

	with open('yt.txt', 'a') as f:
	for title in strip_titles:
	f.write(title + '\n')