Last active
September 7, 2020 14:13
-
-
Save gorshunovr/2d34b9ef9e839841eccabb3b5bfe0760 to your computer and use it in GitHub Desktop.
Python Podcasts RSS parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Podcasts RSS parser: extracts audio, description, | |
# episode picture URLs to feed to | |
# `ffmpeg` and YouTube upload | |
# | |
# Roman Gorshunov, linkmeup, 2020 | |
# | |
import argparse | |
import logging | |
import re | |
try: | |
import feedparser | |
except ImportError as e: | |
sys.exit( | |
"Failed to import feedparser library needed to run " | |
"this tool %s" % str(e)) | |
descr_text = ( | |
"getrss.py парсит ./podcasts.xml файл и выводит отдельные его" | |
"поля или части полей.") | |
parser = argparse.ArgumentParser(description=descr_text) | |
logging.basicConfig(level=logging.INFO) | |
LOG = logging.getLogger(__name__) | |
# NewsFeed = feedparser.parse("https://linkmeup.ru/rss/podcasts") | |
# Можно заранее скачать этот RSS и использовать локальный файл: | |
# curl -o podcasts.xml https://linkmeup.ru/rss/podcasts | |
NewsFeed = feedparser.parse("./podcasts.xml") | |
class Episode: | |
"""Эпизод подкаста. | |
""" | |
def __findImageURL(self, i, longShowNotes): | |
"""Получает параметром индекс выпуска и текст с '\n' разделителями, | |
возвращает список URL картинок из <img src="xxx"> тегов. | |
Исключает картинки со словом patreon в URL. | |
""" | |
imageURL = "" | |
for line in longShowNotes.splitlines(): | |
if "<img " in line and "patreon" not in line: | |
tmp = re.sub('.*src="', '', line) | |
tmp = re.sub('".*', '', tmp) | |
if len(imageURL) == 0: | |
imageURL = tmp | |
else: | |
LOG.warning("В выпуске %s несколько картинок" % i) | |
imageURL = imageURL + " " + tmp | |
if not imageURL or len(imageURL) < 0: | |
LOG.warning("Нет картинки выпуска в %s" % i) | |
return imageURL | |
def __init__(self, recordID, episode): | |
self.recordID = recordID | |
LOG.debug('recordId:', self.recordID) | |
self.title = episode['title'] | |
self.link = episode['link'] | |
try: | |
self.mediaURL = episode['enclosures'][0]['href'] | |
self.mediaFilename = re.sub('.*\/', '', self.mediaURL) | |
self.videoFilename = re.sub('\.mp3', '.mp4', self.mediaFilename) | |
except IndexError: | |
LOG.warning("Нет айдиофайла в выпуске %s" % self.recordID) | |
self.mediaURL = None | |
self.mediaFilename = None | |
self.videoFilename = None | |
self.iTunesSummary = repr(episode['content'][0]['value']) | |
self.longShowNotes = episode['summary'] | |
self.imageURL = self.__findImageURL(self.recordID, self.longShowNotes) | |
def printTitle(self): | |
print('#' + str(self.recordID) + ' Title: ' + str(self.title)) | |
def printMediaURL(self): | |
print('#' + str(self.recordID) + ' Media URL: ' + str(self.mediaURL)) | |
def printImageURL(self): | |
print('#' + str(self.recordID) + ' Image URL: ' + str(self.imageURL)) | |
def printCmdline(self): | |
"""Выводит командную строку для скрипта upload_video.py | |
Сам этот скрипт лежит и описан тут: | |
https://developers.google.com/youtube/v3/guides/uploading_a_video | |
""" | |
print( | |
'python upload_video.py --file=\"' + self.videoFilename + '\" ' + | |
'--title=\"' + self.title + '\" --description=' + | |
self.iTunesSummary + ' --keywords=\"\" --category=\"22\" ' + | |
'--privacyStatus=\"private\"' | |
) | |
def printFullInfo(i, title, link, mediaURL, mediaFilename, videoFilename, | |
iTunesSummary, longShowNotes): | |
"""Выводит информацию о выпуске | |
""" | |
print("#ItemNumber: %s" % i) | |
print("#Title: %s" % title) | |
# print("#Post URL: %s" % link) | |
print("#Media URL: %s" % mediaURL) | |
# print("#Auduo filename: %s" % mediaFilename) | |
# print("#Video filename: %s" % videoFilename) | |
print('#Image URL:', findImageURL(i, longShowNotes)) | |
print("#Long summary: %s" % longShowNotes) | |
# print("#iTunes summary: %s" % iTunesSummary) | |
def loopOverRSSItems(args): | |
recordID = args.record_id | |
episodesList = [] | |
if recordID is not None: | |
e = [i[1] for i in enumerate(NewsFeed.entries) if i[0] == recordID] | |
episodesList.append(Episode(recordID, e[0])) | |
else: | |
for recordID, e in enumerate(NewsFeed.entries): | |
episodesList.append(Episode(recordID, e)) | |
for episode in episodesList: | |
episode.printTitle() | |
episode.printImageURL() | |
episode.printMediaURL() | |
print("\n#**************************\n") | |
if __name__ == "__main__": | |
"""Main program | |
""" | |
parser.add_argument( | |
"--record-id", | |
help="ID выпуска (item) в RSS; " | |
"по-умолчанию – обработftn все выпуски.", | |
type=int) | |
args = parser.parse_args() | |
loopOverRSSItems(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment