Skip to content

Instantly share code, notes, and snippets.

@ignamv
Created February 8, 2019 17:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ignamv/3434e4754569d8fbffb8043cd705680b to your computer and use it in GitHub Desktop.
Save ignamv/3434e4754569d8fbffb8043cd705680b to your computer and use it in GitHub Desktop.
import bs4
import dateparser
import re
with open('/home/imartinezvazquez/rss/mp3_long.php') as fd:
doc = bs4.BeautifulSoup(fd.read())
links = doc.find_all('a')
links = [l for l in links if l.attrs['href'].endswith('.mp3')]
l.parent.parent
link.parent.parent.find('span', class_='duration').text.partition(':')
import pytz
def parse_link(link):
datestr = link.parent.parent.find('span', class_='date').text
date = dateparser.parse(datestr+' EST')
duration = link.parent.parent.find('span', class_='duration').text
minutes = int(re.match(r'Duration: (\d+) min', duration).group(1))
duration = datetime.timedelta(minutes=minutes)
title = link.text.strip()
href = link.attrs['href']
return title, date, duration, href
from podgen import Podcast, Episode, Media
p = Podcast(
name="Dhammatalks.org Lectures",
description="Dhammatalks.org Lectures",
website="https://www.dhammatalks.org/mp3_long.php"
)
for link in links:
title, date, duration, href = parse_link(link)
episode = Episode(
title=title,
publication_date=date,
media=Media.create_from_server_response(href, duration=duration)
)
p.episodes.append(episode)
p.explicit = False
import codecs
with codecs.open('/tmp/lectures.rss', 'w', encoding='utf8') as fd:
fd.write(unicode(p))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment