antonrasmussen/wtf_dispatches.py

## wtf_dispatches.py
from bs4 import BeautifulSoup
from urllib.request import urlopen
import time


html_doc = "Dispatches — WTF with Marc Maron Podcast.html"

date_list = []
url_list = []

with open(html_doc) as fp:
    soup = BeautifulSoup(fp, 'html.parser')
    for j in soup.find('div', id='content').find_all('article'):
        date_list.append(j.find('a').contents[0])
    for i in soup.find('div', id='content').find_all('article'):
        url_list.append(str(i.find('h1').contents[1]).split('=')[2].split(' ')[0])

article_cntr = 0
for url in url_list:
    url = url.replace("\"","")
    article_date = date_list[article_cntr]
    article_cntr += 1

    #time.sleep(20)

    page = urlopen(url)
    html = page.read().decode("utf-8")
    soup = BeautifulSoup(html, "html.parser")

    text = soup.get_text().replace('\n',' ')
    start_loc = soup.get_text().replace('\n',' ').find(article_date)
    end_loc = soup.get_text().replace('\n',' ').find("Powered")

    filename = 'out.txt'
    with open(filename, 'a+') as f:
        print(text[start_loc:end_loc].replace('Frank Cappello',''),file=f) # I have found that Frank's Name comes up a lot
        print("\n",file=f)


    print('Sleep start: ' + str(article_cntr))
    time.sleep(90)
	from bs4 import BeautifulSoup
	from urllib.request import urlopen
	import time


	html_doc = "Dispatches — WTF with Marc Maron Podcast.html"

	date_list = []
	url_list = []

	with open(html_doc) as fp:
	soup = BeautifulSoup(fp, 'html.parser')
	for j in soup.find('div', id='content').find_all('article'):
	date_list.append(j.find('a').contents[0])
	for i in soup.find('div', id='content').find_all('article'):
	url_list.append(str(i.find('h1').contents[1]).split('=')[2].split(' ')[0])

	article_cntr = 0
	for url in url_list:
	url = url.replace("\"","")
	article_date = date_list[article_cntr]
	article_cntr += 1

	#time.sleep(20)

	page = urlopen(url)
	html = page.read().decode("utf-8")
	soup = BeautifulSoup(html, "html.parser")

	text = soup.get_text().replace('\n',' ')
	start_loc = soup.get_text().replace('\n',' ').find(article_date)
	end_loc = soup.get_text().replace('\n',' ').find("Powered")

	filename = 'out.txt'
	with open(filename, 'a+') as f:
	print(text[start_loc:end_loc].replace('Frank Cappello',''),file=f) # I have found that Frank's Name comes up a lot
	print("\n",file=f)


	print('Sleep start: ' + str(article_cntr))
	time.sleep(90)