Last active
February 5, 2024 19:27
-
-
Save HighnessAtharva/c531b7e0e1f7977f40c0add74b6baeee to your computer and use it in GitHub Desktop.
Batch Markdownify URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import sys | |
def get_page_title(url): | |
""" | |
Fetch the title of a webpage from its URL. | |
Args: | |
url (str): The URL of the webpage. | |
Returns: | |
str: The title of the webpage or an empty string if unable to fetch. | |
""" | |
try: | |
response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
title = soup.title.string.strip() | |
return title | |
except (requests.exceptions.RequestException, AttributeError): | |
return '' | |
def generate_markdown_file(input_file, output_file): | |
""" | |
Generate a Markdown file with links to webpage titles from URLs. | |
Args: | |
input_file (str): The name of the input file containing URLs. | |
output_file (str): The name of the output Markdown file. | |
""" | |
try: | |
with open(input_file, 'r', encoding='utf-8') as file: | |
urls = file.read().splitlines() | |
with open(output_file, 'w', encoding='utf-8') as file: | |
for url in urls: | |
if title := get_page_title(url): | |
markdown_link = f"[{title}]({url})" | |
file.write(markdown_link + '\n\n') | |
else: | |
file.write(f"{url}\n\n") | |
print(f"✅ Markdown file '{output_file}' generated successfully.") | |
except FileNotFoundError: | |
print("❌ Error: Input file not found.") | |
except Exception as e: | |
print(f"❌ Error: {e}") | |
if __name__ == "__main__": | |
# Check for command line arguments | |
if len(sys.argv) != 3: | |
print("❌ Usage: python script.py <input_file> <output_file>") | |
sys.exit(1) | |
input_file = sys.argv[1] | |
output_file = sys.argv[2] | |
# Generate Markdown file | |
generate_markdown_file(input_file, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment