Skip to content

Instantly share code, notes, and snippets.

@ispringle
Last active February 16, 2024 20:53
Show Gist options
  • Save ispringle/69f96f4368920968f01e7d14707c83f4 to your computer and use it in GitHub Desktop.
Save ispringle/69f96f4368920968f01e7d14707c83f4 to your computer and use it in GitHub Desktop.
RSS gen w/ globbing
from datetime import datetime
from bs4 import BeautifulSoup
from pathlib import Path
system_header = "/Users/ismailefetop/projects/org-blog/ismailefe_org"
website_header= "https://ismailefe.org"
blog_posts = blog_posts = list(Path(system_header+"blog").rglob("*.html"))
update_time = str(datetime.now().strftime('%a, %d %b %Y %H:%M:%S'))+' +0300'
feed_output = "/Users/ismailefetop/projects/org-blog/ismailefe_org/feed.xml"
xml_file = open(feed_output, "w")
xml_file.write(
f'''<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>İsmail Efe's Blog Site</title>
<link>https://ismailefe.org/</link>
<description>İsmail Efe's Second Brain.</description>
<atom:link href="https://ismailefe.org/feed.xml" rel="self" type="application/rss+xml"/>
<lastBuildDate>{update_time}</lastBuildDate>'''
)
xml_file.close()
# Below function is partially written by ChatGPT.
def parse_html(filename_arg):
# Read the HTML file
with open(filename_arg, 'r', encoding='utf-8') as file:
html_content = file.read()
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Extract title
title_tag = soup.find('title')
title = title_tag.text if title_tag else None
# Extract date (assuming date is in an element with class="date")
date_tag = soup.find(class_='date')
date = date_tag.text if date_tag else None
# Extract body content as HTML
body_tag = soup.body
body_html = str(body_tag) if body_tag else None
post_dict = {"title":title,"date":date,"body_html":body_html}
return post_dict
# Below function is written by ChatGPT.
def format_date(input_date):
# Convert input date string to a datetime object
input_datetime = datetime.strptime(input_date, '%Y-%m-%d')
# Format the datetime object to the desired string format
formatted_date = input_datetime.strftime('%a, %d %b %Y')
return formatted_date
for post in blog_posts:
post_dictionary = parse_html(system_header+post)
xml_file = open(feed_output, "a")
xml_file.write(f'''
<item>
<title>{post_dictionary["title"]}</title>
<description><![CDATA[<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">{post_dictionary["body_html"]}</html>]]></description>
<author>ismailefetop@gmail.com (İsmail Efe Top)</author>
<link>{website_header+post}</link>
<guid>{website_header+post}</guid>
<pubDate>{format_date(post_dictionary["date"])} 00:00:00 +0300</pubDate>
</item>
''')
xml_file.close()
xml_file = open(feed_output, "a")
xml_file.write('''
</channel>
</rss>''')
xml_file.close()
@Ektaynot
Copy link

Thank you so much!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment