Skip to content

Instantly share code, notes, and snippets.

@dorukcan
Created October 20, 2015 20:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dorukcan/4f0133eaff47c62ef72c to your computer and use it in GitHub Desktop.
Save dorukcan/4f0133eaff47c62ef72c to your computer and use it in GitHub Desktop.
creates a podcast feed file for using in beyondpod podcast application
import urllib2
import re
from BeautifulSoup import BeautifulSoup
from feedgen.feed import FeedGenerator
DIZI_TITLE = "Six Feet Under"
DIZI_MAIN_URL = "http://dizipub.com/dizi/six-feet-under-tum-bolumler-izle"
DIZI_LOGO = "http://images.popmatters.com/news_art/s/six-feet-under.jpg"
def downloadUrl(url):
req = urllib2.Request(url)
req.add_header('User-agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36')
req.add_header('Host', 'dizipub.com')
resp = urllib2.urlopen(req)
return resp.read()
def fetch():
#bolum adreslerini al
html = downloadUrl(DIZI_MAIN_URL)
soup = BeautifulSoup(html)
DIZI_BOLUM_URLS = []
for season in soup.findAll('div', attrs={"class":"list-table"}):
for a in season.findAll("a"):
print a["href"]
DIZI_BOLUM_URLS.append(a["href"])
print "\n##########################################################\n"
#bolum adreslerinden iframe linklerini al
EMBED_URLS = []
TITLES = []
for url in DIZI_BOLUM_URLS:
html = downloadUrl(url)
soup = BeautifulSoup(html)
em = soup.find("div", attrs={"id": "embed-wrapper"}).iframe["src"]
title = soup.find("title").text
print title, em
EMBED_URLS.append(em)
TITLES.append(title)
print "\n##########################################################\n"
#iframe'lerin icinden video linklerini al
VIDEO_URLS = []
for url in EMBED_URLS:
html = downloadUrl(url)
try:
soup = BeautifulSoup(html)
script = soup.body.find("script")
mp4 = re.search('{"file": "(.*?)", "label": "360"', str(script)).group(1)
print mp4
VIDEO_URLS.append(mp4)
except:
print "olmadi"
VIDEO_URLS.append("")
return VIDEO_URLS, TITLES
def createFeed(links, titles):
#feed dosyasini olustur
fg = FeedGenerator()
fg.load_extension("podcast")
fg.id("http://twitter.com/dorukcankisin")
fg.title(DIZI_TITLE)
fg.author({'name':'dorukcan kisin','email':'dckisin@gmail.com'})
fg.link(href='http://twitter.com/dorukcankisin', rel='alternate')
fg.logo(DIZI_LOGO)
fg.subtitle(DIZI_TITLE + ' videocast')
fg.language('en')
for i, url in enumerate(links):
fe = fg.add_entry()
fe.id(url)
fe.enclosure(url, 0, 'video/mp4')
fe.title(titles[i])
fe.description(titles[i])
fg.rss_file('rss.xml')
return fg.rss_str(pretty=True)
if __name__ == '__main__':
links, titles = fetch()
xmlData = createFeed(links, titles)
#do something with the xmlData, maybe upload to dropbox
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment