Skip to content

Instantly share code, notes, and snippets.

@gorshunovr
Last active September 7, 2020 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gorshunovr/2d34b9ef9e839841eccabb3b5bfe0760 to your computer and use it in GitHub Desktop.
Save gorshunovr/2d34b9ef9e839841eccabb3b5bfe0760 to your computer and use it in GitHub Desktop.
Python Podcasts RSS parser
#!/usr/bin/env python3
# Podcasts RSS parser: extracts audio, description,
# episode picture URLs to feed to
# `ffmpeg` and YouTube upload
#
# Roman Gorshunov, linkmeup, 2020
#
import argparse
import logging
import re
try:
import feedparser
except ImportError as e:
sys.exit(
"Failed to import feedparser library needed to run "
"this tool %s" % str(e))
descr_text = (
"getrss.py парсит ./podcasts.xml файл и выводит отдельные его"
"поля или части полей.")
parser = argparse.ArgumentParser(description=descr_text)
logging.basicConfig(level=logging.INFO)
LOG = logging.getLogger(__name__)
# NewsFeed = feedparser.parse("https://linkmeup.ru/rss/podcasts")
# Можно заранее скачать этот RSS и использовать локальный файл:
# curl -o podcasts.xml https://linkmeup.ru/rss/podcasts
NewsFeed = feedparser.parse("./podcasts.xml")
class Episode:
"""Эпизод подкаста.
"""
def __findImageURL(self, i, longShowNotes):
"""Получает параметром индекс выпуска и текст с '\n' разделителями,
возвращает список URL картинок из <img src="xxx"> тегов.
Исключает картинки со словом patreon в URL.
"""
imageURL = ""
for line in longShowNotes.splitlines():
if "<img " in line and "patreon" not in line:
tmp = re.sub('.*src="', '', line)
tmp = re.sub('".*', '', tmp)
if len(imageURL) == 0:
imageURL = tmp
else:
LOG.warning("В выпуске %s несколько картинок" % i)
imageURL = imageURL + " " + tmp
if not imageURL or len(imageURL) < 0:
LOG.warning("Нет картинки выпуска в %s" % i)
return imageURL
def __init__(self, recordID, episode):
self.recordID = recordID
LOG.debug('recordId:', self.recordID)
self.title = episode['title']
self.link = episode['link']
try:
self.mediaURL = episode['enclosures'][0]['href']
self.mediaFilename = re.sub('.*\/', '', self.mediaURL)
self.videoFilename = re.sub('\.mp3', '.mp4', self.mediaFilename)
except IndexError:
LOG.warning("Нет айдиофайла в выпуске %s" % self.recordID)
self.mediaURL = None
self.mediaFilename = None
self.videoFilename = None
self.iTunesSummary = repr(episode['content'][0]['value'])
self.longShowNotes = episode['summary']
self.imageURL = self.__findImageURL(self.recordID, self.longShowNotes)
def printTitle(self):
print('#' + str(self.recordID) + ' Title: ' + str(self.title))
def printMediaURL(self):
print('#' + str(self.recordID) + ' Media URL: ' + str(self.mediaURL))
def printImageURL(self):
print('#' + str(self.recordID) + ' Image URL: ' + str(self.imageURL))
def printCmdline(self):
"""Выводит командную строку для скрипта upload_video.py
Сам этот скрипт лежит и описан тут:
https://developers.google.com/youtube/v3/guides/uploading_a_video
"""
print(
'python upload_video.py --file=\"' + self.videoFilename + '\" ' +
'--title=\"' + self.title + '\" --description=' +
self.iTunesSummary + ' --keywords=\"\" --category=\"22\" ' +
'--privacyStatus=\"private\"'
)
def printFullInfo(i, title, link, mediaURL, mediaFilename, videoFilename,
iTunesSummary, longShowNotes):
"""Выводит информацию о выпуске
"""
print("#ItemNumber: %s" % i)
print("#Title: %s" % title)
# print("#Post URL: %s" % link)
print("#Media URL: %s" % mediaURL)
# print("#Auduo filename: %s" % mediaFilename)
# print("#Video filename: %s" % videoFilename)
print('#Image URL:', findImageURL(i, longShowNotes))
print("#Long summary: %s" % longShowNotes)
# print("#iTunes summary: %s" % iTunesSummary)
def loopOverRSSItems(args):
recordID = args.record_id
episodesList = []
if recordID is not None:
e = [i[1] for i in enumerate(NewsFeed.entries) if i[0] == recordID]
episodesList.append(Episode(recordID, e[0]))
else:
for recordID, e in enumerate(NewsFeed.entries):
episodesList.append(Episode(recordID, e))
for episode in episodesList:
episode.printTitle()
episode.printImageURL()
episode.printMediaURL()
print("\n#**************************\n")
if __name__ == "__main__":
"""Main program
"""
parser.add_argument(
"--record-id",
help="ID выпуска (item) в RSS; "
"по-умолчанию – обработftn все выпуски.",
type=int)
args = parser.parse_args()
loopOverRSSItems(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment