Last active
June 18, 2020 20:35
-
-
Save UnsignedArduino/afa05f44b2480a68a28b56cbf3552a16 to your computer and use it in GitHub Desktop.
FanFiction Downloader v3 | Github Gist | Downloads and parses web pages and writes them to a file in the current working directory. Specially designed to parse fanfictions from fanfiction.net. Now comes with a GUI!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tkinter as tk | |
from tkinter import filedialog | |
from tkinter import ttk | |
from tkinter import messagebox | |
import requests # pip install requests | |
import bs4 # pip install beautifulsoup4 | |
import pathlib | |
from time import sleep | |
import re | |
# import logging | |
# Set logging level | |
# logging.basicConfig(level=logging.DEBUG) | |
# Define functions that takes care of the ID and URL entries | |
# https://bit.ly/tkinterautofillonso | |
after_ids = {} | |
def get_url(id_): | |
"""returns url from id.""" | |
url = "https://www.fanfiction.net/s/{}/" | |
return url.format(id_.replace(" ", "")) | |
def get_id(url): | |
"""returns id from the url.""" | |
l = url.split("/") | |
return l[4] if len(l) > 4 else "" | |
def autofill_entry(mode, delay=50): | |
"""Auto-fills Url/ID.""" | |
for v in after_ids.values(): | |
root.after_cancel(v) | |
if mode == "url": | |
id_ = get_id(fanfic_url.get()) | |
after_ids[0] = root.after(delay, lambda: fanfic_id.set(id_)) | |
elif mode == "id": | |
url = get_url(fanfic_id.get()) | |
after_ids[1] = root.after(delay, lambda: fanfic_url.set(url)) | |
# Thanks @Saad at SO | |
# Define a function to have the user choose a directory and set the path accordingly | |
def get_directory(): | |
global path | |
# Ask for directory | |
selected_directory = filedialog.askdirectory() | |
# Set selected directory if user did not cancel | |
if selected_directory: | |
path = pathlib.Path(selected_directory) | |
directory.set(path) | |
# Define a function to set the status | |
def set_status(string): | |
status.config(text = string + 100 * " ") | |
# Allows GUI to process and display events | |
root.update_idletasks() | |
# Define the function to download the fanfic | |
def download_fanfic(): | |
num_chapter = 1 | |
chapter = 0 | |
base_url = fanfic_url.get() | |
progress_bar["value"] = 0 | |
while not chapter == num_chapter: | |
# Set progress barmax value | |
progress_bar["maximum"] = 10 * num_chapter | |
chapter += 1 | |
url = base_url + f"{chapter}/" | |
set_status(f"Downloading {url}...") | |
progress_bar["value"] = 1 * chapter | |
try: | |
response = requests.get(url) | |
except requests.exceptions.MissingSchema: | |
set_status(f"Error downloading {url}...") | |
messagebox.showerror("FanFiction Downloader: ERROR", | |
"Uh oh, an error has occurred!\n" | |
"That does not seem to a be a valid URL!") | |
except requests.exceptions.InvalidSchema: | |
set_status(f"Error downloading {url}...") | |
messagebox.showerror("FanFiction Downloader: ERROR", | |
"Uh oh, an error has occurred!\n" | |
"That does not seem to a be a valid URL!") | |
else: | |
set_status(f"Status code is {response.status_code}") | |
if response.status_code == 200: | |
set_status("Parsing FanFiction...") | |
# Parse HTML with html5lib | |
soup = bs4.BeautifulSoup(response.content, "html5lib") | |
progress_bar["value"] = 2 * chapter | |
# Check if we actually got a real chapter not an error message | |
if response.text.find("FanFiction.Net Message Type ") == -1 and \ | |
response.text.find("Story Not Found") == -1: | |
# Count the number of chapters we need to download | |
set_status("Counting chapters...") | |
# This gets the number between the words "Chapters: " and " - Words: ") | |
num_chapter = int( | |
soup.find("span", class_="xgray xcontrast_txt").text[ | |
soup.find( | |
"span", class_="xgray xcontrast_txt" | |
).text.find("Chapters: ") + 10:soup.find( | |
"span", class_="xgray xcontrast_txt" | |
).text.find(" - Words: ") | |
] | |
) | |
progress_bar["value"] = 3 * chapter | |
# Add title | |
set_status("Finding title...") | |
text = soup.find("b", class_="xcontrast_txt").string + "\n" | |
progress_bar["value"] = 4 * chapter | |
# Find first link that looks like the author's name | |
set_status("Finding author...") | |
for author in soup.findAll("a", class_="xcontrast_txt"): | |
if author.parent.name == "div": | |
text += f"By: {author.string}\n\n" | |
# Break because we only want the first one | |
break | |
progress_bar["value"] = 5 * chapter | |
# Add the synopsis | |
set_status("Finding synopsis...") | |
text += f"Synopsis: {soup.find('div', class_='xcontrast_txt').string}\n\n" | |
progress_bar["value"] = 6 * chapter | |
# Add more information about fanfiction | |
set_status("Finding FanFiction information...") | |
text += soup.find("span", class_="xgray xcontrast_txt").text + "\n\n" | |
progress_bar["value"] = 7 * chapter | |
# Add fanfic iteself | |
set_status("Finding FanFiction content...") | |
for paragraph in soup.find_all("p"): | |
text += paragraph.text + "\n" | |
progress_bar["value"] = 8 * chapter | |
set_status("Signing FanFiction...") | |
# Add signature | |
text += "\n\nThis fanfiction was downloaded with the fanfiction downloader v3\n" | |
text += "See the source code at https://bit.ly/fanficdownloaderv3code\n" | |
# Add link to original fanfiction | |
text += f"Link to fanfiction (To read online): {url}\n" | |
progress_bar["value"] = 9 * chapter | |
# Make path to fanfiction | |
file_path = path / ( | |
re.sub("[^\w\-_\. ]", "_", soup.title.string).replace(" ", "_") + ".txt" | |
) | |
set_status("Writing FanFiction to " | |
f"{str(file_path)[:80] + '...' if len(str(file_path)) > 80 else str(file_path)}") | |
# If a file with that name already exists | |
if file_path.exists(): | |
if messagebox.askokcancel("FanFiction Downloader v3: Confirmation", | |
"It looks like this file already exists! Overwrite?"): | |
with file_path.open("wt") as file: | |
file.write(text) | |
set_status("Sucessfully written FanFiction to " | |
f"{str(file_path)[:68] + '...' if len(str(file_path)) > 68 else str(file_path)}") | |
else: | |
set_status("Canceled writting FanFiction to " | |
f"{str(file_path)[:70] + '...' if len(str(file_path)) > 70 else str(file_path)}") | |
else: | |
with file_path.open("wt") as file: | |
file.write(text) | |
set_status("Sucessfully written FanFiction to " | |
f"{str(file_path)[:68] + '...' if len(str(file_path)) > 68 else str(file_path)}") | |
progress_bar["value"] = 10 * chapter | |
else: | |
# Chapter does not exist | |
if response.text.find("FanFiction.Net Message Type ") == -1: | |
messagebox.showerror("FanFiction Downloader: ERROR", | |
"Uh oh, an error has occurred!\n" | |
"Story Not Found\n" | |
"Story is unavailable for reading. (A)") | |
# Story does not exist | |
elif response.text.find("Story Not Found") == -1: | |
messagebox.showerror("FanFiction Downloader v3: ERROR", | |
"Uh oh, an error has occurred!\n" | |
"Chapter not found. Please check to see you are not using an outdated url.\n" | |
"New chapter/story can take up to 15 minutes to show up.") | |
else: | |
# Raise an error | |
messagebox.showerror("FanFiction Downloader v3: ERROR", | |
"Uh oh, an error has occurred!\n" | |
f"Unexpected status code: {response.status_code}\n" | |
"No files have been written.") | |
# Define the path where to download the fanfics | |
path = pathlib.Path.cwd() | |
# Root window | |
root = tk.Tk() | |
# Set title of root window | |
root.title("FanFiction Downloader v3") | |
# Define the labeled frame where we input stuff | |
input_frame = tk.LabelFrame(master=root, text="Input") | |
input_frame.grid(row=0, column=0, padx=1, pady=1, rowspan=2, sticky=tk.NS) | |
# Label for entering URL | |
ttk.Label(master=input_frame, text="URL of FanFiction:").grid(row=0, column=0, padx=1, pady=1) | |
# Entry field for URL | |
fanfic_url = tk.StringVar() | |
fanfic_url.trace_variable("w", lambda *a: autofill_entry("url")) | |
url_entry = ttk.Entry(master=input_frame, textvariable=fanfic_url) | |
url_entry.grid(row=0, column=1, padx=1, pady=1) | |
# Label for entering fanfic ID | |
ttk.Label(master=input_frame, text="ID of FanFiction:").grid(row=1, column=0, padx=1, pady=1) | |
# Entry field for fanfic ID | |
fanfic_id = tk.StringVar() | |
fanfic_id.trace_variable("w", lambda *a: autofill_entry("id")) | |
id_entry = ttk.Entry(master=input_frame, textvariable=fanfic_id) | |
id_entry.grid(row=1, column=1, padx=1, pady=1) | |
# Define the labeled frame where we output stuff | |
output_frame = tk.LabelFrame(master=root, text="Output") | |
output_frame.grid(row=0, column=1, padx=1, pady=1, sticky=tk.NW) | |
# Label for entering directory of downloaded files | |
ttk.Label( | |
master=output_frame, text="Directory path of downloaded FanFictions:" | |
).grid(row=0, column=0, padx=1, pady=1) | |
# Entry field for directory of downloaded files | |
directory = tk.StringVar() | |
directory.set(path) | |
directory_entry = ttk.Entry(master=output_frame, textvariable=directory) | |
directory_entry.grid(row=0, column=1, padx=1, pady=1) | |
# Button to browse for directory of downloaded files | |
ttk.Button(master=output_frame, text="Browse", command=get_directory).grid(row=0, column=2, padx=1, pady=1) | |
# Button to start downloading fanfic | |
ttk.Button(master=root, text="Start", command=download_fanfic).grid(row=1, column=1, padx=1, pady=1, sticky=tk.NSEW) | |
# Define the labeled frame where we output status stuff | |
status_frame = tk.LabelFrame(master=root, text="Status") | |
status_frame.grid(row=2, column=0, padx=1, pady=1, columnspan=2, sticky=tk.NSEW) | |
# Progress bar for, well, progress | |
progress_bar = ttk.Progressbar(master=status_frame, orient=tk.HORIZONTAL, length=670, mode="determinate") | |
progress_bar.grid(row=0, column=0, padx=1, pady=1) | |
# Status bar for showing current operation | |
status = ttk.Label(master=status_frame, text="Idle", width=100) | |
status.grid(row=1, column=0, padx=1, pady=1, sticky=tk.NW) | |
# Start GUI event loop | |
root.mainloop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment