Skip to content

Instantly share code, notes, and snippets.

Created December 15, 2017 18:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/42ff1e418ab315c9637f3ffe08ab0a2b to your computer and use it in GitHub Desktop.
Save anonymous/42ff1e418ab315c9637f3ffe08ab0a2b to your computer and use it in GitHub Desktop.
import urllib2
import httplib2
import requests
import json
import os
from BeautifulSoup import BeautifulSoup, SoupStrainer
links = dict()
urls = ["https://www.stuffyoushouldknow.com/podcasts/sysk-archive.htm",
"https://www.stufftoblowyourmind.com/podcasts/stbym-archive.htm",
"https://www.stufftheydontwantyoutoknow.com/podcasts/stdwytk-archive.htm",
"https://www.brainstuffshow.com/podcasts/brainstuff-archive.htm",
"https://www.parttimegenius.show/podcasts/ptg-archive.htm",
"https://www.missedinhistory.com/podcasts/mih-archive.htm",
"https://www.stuffmomnevertoldyou.com/podcasts/smnty-archive.htm",
"https://www.fwthinking.com/podcasts/fwt-archive.htm"]
JSONDIR = os.getcwd() + "\json\\"
try:
os.mkdir(JSONDIR)
except WindowsError, e:
print e
pass
def findLinks(url):
global writtenStart,writtenEnd
writtenStart = False
writtenEnd = False
os.chdir(JSONDIR)
filename = url[-16:-4] + ".json"
response = requests.get(url).content
soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('a'))
links = []
for l in soup:
if l.has_key('href'):
if("podcasts/") in l['href']:
text = l.text.encode('ascii', 'ignore').decode('ascii')
url = l['href']
data = " \"%s\": {\n \"name\": \"%s\",\n \"url\": \"%s\"\n }\n \n}" % (text, text, url)
print data
links.append(data)
with open(filename, 'w') as f:
for s in links:
f.write(s)
print "written file {}".format(filename)
if __name__ == "__main__":
for i in range(len(urls)):
findLinks(urls[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment