Skip to content

Instantly share code, notes, and snippets.

@libbymiller
Created April 21, 2024 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save libbymiller/03c602b28d2b442df6993259de66283f to your computer and use it in GitHub Desktop.
Save libbymiller/03c602b28d2b442df6993259de66283f to your computer and use it in GitHub Desktop.
Bodgy beautifulsoup to create an index page for a tumblr site
from pathlib import Path
import re
from bs4 import BeautifulSoup
strs = []
path = "html"
mydir = Path(path)
soup_arr = []
soup2 = BeautifulSoup("""
<!DOCTYPE HTML>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<link rel="stylesheet" type="text/css" href="style.css"/>
</head>
<body>
""")
for file in sorted(mydir.glob('*.html')):
#print(file.name)
fn = path+"/"+file.name
with open(fn, 'r') as f:
soup = BeautifulSoup(f)
# add it all in
new_group = soup2.new_tag("div id=\""+file.name+"\"")
new_body = soup.find('body')
new_body.insert_after(new_group)
caption_div = soup.body.div
caption = None
if(caption_div):
if(caption_div.attrs.get("class")):
if("caption" in caption_div.attrs.get("class")):
caption = caption_div
if(caption == None):
caption=soup2.new_tag("div")
ts = soup.find_all('span')
tts = None
if(ts):
tts = ts[0]
else:
tts = soup2.new_tag("span")
embed = soup.body.embed
vid = None
if(embed):
src = embed['src']
src2 = re.sub(r"../../", "", src)
src2 = re.sub(r"mp4", ".mp4", src2)
vid = soup2.new_tag("video",src=src2,autoplay='false',controls='true')
else:
vid = soup2.new_tag("span")
imgs = soup.find_all('img')
# add in order
soup2.append(caption)
soup2.append(tts)
soup2.append(vid)
if(imgs):
for img in imgs:
src = img['src']
src2 = re.sub(r"../../", "", src)
i = soup2.new_tag("img",src=src2)
soup2.append(i)
# we don't need figures any more
for tag in soup2.find_all('figure'):
tag.decompose()
print(soup2.prettify())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment