Skip to content

Instantly share code, notes, and snippets.

@GandaG
Created September 16, 2018 12:57
Show Gist options
  • Save GandaG/9edcc1e6ab8ee233c218ebddd175bc70 to your computer and use it in GitHub Desktop.
Save GandaG/9edcc1e6ab8ee233c218ebddd175bc70 to your computer and use it in GitHub Desktop.
A small script to generate RSS 2.0 feeds from fictionpress.com stories.
#!/usr/bin/python3
# MIT License
# Copyright (c) 2018 Daniel Nunes
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# USAGE
# rss_fictionpress <id>
#
# id - The number found in the story's url after 'www.fictionpress.com/s/'
#
# The built feed will be printed to stdout.
#
# There is no code to account for improper usage, you're on your own.
import requests
import sys
import xml.etree.ElementTree as etree
from bs4 import BeautifulSoup
MAX_CHAPS = 10
story_id = sys.argv[1]
index_url = 'https://www.fictionpress.com/s/{}'.format(story_id)
response = requests.get(index_url)
response.raise_for_status()
story_soup = BeautifulSoup(response.content, features="html5lib")
new_feed = etree.Element('rss', version="2.0")
channel = etree.SubElement(new_feed, 'channel')
title = etree.SubElement(channel, 'title')
title.text = story_soup.find('div', attrs={'id': 'profile_top'}).b.string
link = etree.SubElement(new_feed, 'link')
link.text = index_url
chap_sel = story_soup.find('select', attrs={'id': 'chap_select'})
for option in reversed(chap_sel.find_all('option')):
num = option['value']
chap_url = '{}/{}'.format(index_url, num)
chap_res = requests.get(chap_url)
chap_res.raise_for_status()
chap_soup = BeautifulSoup(chap_res.content, features='html5lib')
item = etree.SubElement(channel, 'item')
title = etree.SubElement(item, 'title')
title.text = option.string.replace('{}. '.format(num), '', 1)
link = etree.SubElement(item, 'link')
link.text = chap_url
guid = etree.SubElement(item, 'guid', isPermaLink='false')
guid.text = chap_url
desc = etree.SubElement(item, 'description')
desc_div = chap_soup.find('div', attrs={'id': 'storytext'})
children = desc_div.children
desc_txt = "".join(str(child) for child in children if str(child).strip())
desc.text = desc_txt
MAX_CHAPS -= 1
if MAX_CHAPS == 0:
break
print(etree.tostring(new_feed, encoding='unicode'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment