Skip to content

Instantly share code, notes, and snippets.

@Rust1667
Last active April 6, 2024 08:10
Show Gist options
  • Save Rust1667/efc055debaf4876e4de39cc0d32f18c1 to your computer and use it in GitHub Desktop.
Save Rust1667/efc055debaf4876e4de39cc0d32f18c1 to your computer and use it in GitHub Desktop.
This is a script to download all rentrys from the FreeMediaHeckYeah Wiki including the base64 page. --- Requirements: You need to have the python package "rentry" installed ("pip install rentry" or download https://github.com/radude/rentry/blob/master/rentry.py in the same folder with the example.env file from that same repo renamed as ".env"))
import os
import base64
import re
import requests
import rentry
def extract_string_from_url(url):
# Define the regular expression pattern to match the string after the last '/'
pattern = r'https://rentry\.(?:co|org)/([^/]+)$'
# Use re.search to find the match in the URL
match = re.search(pattern, url)
if match:
# Group 1 of the match contains the extracted string
extracted_string = match.group(1)
return extracted_string
return None
def extract_rentry_links(text):
# Define a regular expression pattern to match rentry.co and rentry.org links
pattern_rentry = r'\(https:\/\/rentry\.(?:co|org)\/[a-zA-Z0-9\-_]+\)'
# Find all matches in the text using the pattern
matches_rentry = re.findall(pattern_rentry, text)
matches = matches_rentry
# Remove the parentheses
links = [match[1:-1] for match in matches]
return links
def download_raw_content(url):
print("\ndownloading from " + url + "...")
response = requests.get(url, timeout=10)
print("downloaded.\n")
return response.text
def get_main_wiki_rentry_links():
url = "https://raw.githubusercontent.com/nbats/FMHYedit/main/single-page"
text = download_raw_content(url)
rentry_links = extract_rentry_links(text)
return rentry_links
def get_base64_rentry_links():
url = "https://raw.githubusercontent.com/fmhy/FMHYedit/main/base64.md"
content = download_raw_content(url)
# Decode base64
decoded_content = ""
start = 0
while True:
start = content.find("`", start)
if start == -1:
break
end = content.find("`", start + 1)
if end == -1:
break
encoded_string = content[start + 1:end]
try:
decoded_bytes = base64.b64decode(encoded_string)
decoded_content += decoded_bytes.decode("utf-8")
except base64.binascii.Error:
decoded_content += f"`{encoded_string}`"
start = end + 1
decoded_content = decoded_content.replace("http", "\nhttp")
links = []
for line in decoded_content.split("\n"):
if re.search(r"(rentry\.co|rentry\.org)", line):
links.append(line)
return links
def remove_duplicates(strings):
unique_strings = {}
result = []
for s in strings:
s_lower = s.lower()
if s_lower not in unique_strings:
unique_strings[s_lower] = True
result.append(s)
print("\nremoved " + str(len(strings) - len(result)) + " duplicates.")
return result
def get_rentry_content_and_title(url):
urlstring = extract_string_from_url(url)
rentry_content_and_title = {
'title': urlstring,
'content': rentry.raw(urlstring)['content']
}
return rentry_content_and_title
def save_string_to_text_file(string, file_path):
with open(file_path, "w", encoding='utf-8') as f:
f.write(string)
print(f"Result saved to {file_path}")
def main():
# Links from the main wiki
main_wiki_rentry_links = get_main_wiki_rentry_links()
print(str(len(main_wiki_rentry_links)) + " links")
for link in main_wiki_rentry_links: print(link)
# Links from base64
base64_rentry_links = get_base64_rentry_links()
print(str(len(base64_rentry_links)) + " links")
for link in base64_rentry_links: print(link)
# Join lists, remove duplicates, count total
links = main_wiki_rentry_links + base64_rentry_links
links = remove_duplicates(links)
print("\n\n" + str(len(links)) + " links total\n\n")
# Confirm download in current directory
print("Current directory: " + os.getcwd() + "\n")
confirm = input(f"Do you want to download {len(links)} markdown files in the current directory? (y/n): ") == "y"
# Download files
if confirm:
for link in links:
r = get_rentry_content_and_title(link)
save_string_to_text_file(r['content'], str(r['title']) + ".md")
else:
print("Aborted.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment