Skip to content

Instantly share code, notes, and snippets.

@WardBenjamin
Created February 5, 2017 14:56
Show Gist options
  • Save WardBenjamin/c4479a0714f3fba17630c9dfc340ff7a to your computer and use it in GitHub Desktop.
Save WardBenjamin/c4479a0714f3fba17630c9dfc340ff7a to your computer and use it in GitHub Desktop.
Crude RoyalRoadL downloader, replace XXXXXX with a fiction id
# -*- coding: utf-8 -*-
from urllib import request
from bs4 import BeautifulSoup
if __name__ == '__main__':
fiction_soup = BeautifulSoup(request.urlopen('http://royalroadl.com/fiction/XXXXXX'), "html.parser")
chapter_trs = fiction_soup.findAll('tr')
chapter_urls = []
for tr in chapter_trs:
# print('http://royalroadl.com' + str(tr.get('data-url')))
data_url = str(tr.get('data-url'))
if data_url != 'None':
chapter_urls.append('http://royalroadl.com' + str(tr.get('data-url')))
for url in chapter_urls:
print(url)
chapter_soup = BeautifulSoup(request.urlopen(url), "html.parser")
chapter_div = chapter_soup.find('div', {'class': "chapter-inner chapter-content"})
chapter_number = str(url).split("/")[-1]
with open(chapter_number + ".html", "w", encoding='utf-8') as f:
f.write(str(chapter_div))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment