Skip to content

Instantly share code, notes, and snippets.

@MaraScott
Created May 16, 2024 21:13
Show Gist options
  • Save MaraScott/0c008760a38c66e4d0c719389c5926ab to your computer and use it in GitHub Desktop.
Save MaraScott/0c008760a38c66e4d0c719389c5926ab to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import pandas as pd
# Step 1: Parse the favorites_5_16_24.html file
with open('favorites_5_16_24.html', 'r', encoding='utf-8') as file:
soup = BeautifulSoup(file, 'html.parser')
# Extract all links
links = soup.find_all('a')
bookmarks = [{'name': link.get_text(), 'url': link['href']} for link in links]
# Save bookmarks to CSV
bookmarks_df = pd.DataFrame(bookmarks)
bookmarks_df.to_csv('favorites_5_16_24.csv', index=False)
# Assume you update the CSV manually or programmatically, then read back the updated CSV
updated_bookmarks_df = pd.read_csv('favorites_5_16_24_normalized.csv')
# Convert updated DataFrame to a list of dictionaries
updated_bookmarks = updated_bookmarks_df.to_dict(orient='records')
# Create a mapping of old URLs to new URLs (assuming URLs are unique)
url_mapping = {bookmark['url']: updated_bookmark['url'] for bookmark, updated_bookmark in zip(bookmarks, updated_bookmarks)}
# Update the HTML with new URLs
for link in soup.find_all('a'):
old_url = link['href']
if old_url in url_mapping:
link['href'] = url_mapping[old_url]
# Create a mapping of old URLs to new names (assuming URLs are unique)
name_mapping = {bookmark['url']: updated_bookmark['name'] for bookmark, updated_bookmark in zip(bookmarks, updated_bookmarks)}
# Update the HTML with new names
for link in soup.find_all('a'):
old_url = link['href']
if old_url in name_mapping:
link.string = name_mapping[old_url]
# Save the updated HTML back to file
with open('favorites_5_16_24_updated.html', 'w', encoding='utf-8') as file:
file.write(str(soup.prettify(formatter=None)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment