Skip to content

Instantly share code, notes, and snippets.

@zelinskiy
Last active August 25, 2020 14:18
Show Gist options
  • Save zelinskiy/83f1323fab99e6d9e2eccc58ecd68528 to your computer and use it in GitHub Desktop.
Save zelinskiy/83f1323fab99e6d9e2eccc58ecd68528 to your computer and use it in GitHub Desktop.
from datetime import date, timedelta
import urllib.request, urllib.error, urllib.parse
import re, os
# TODO:
# Ad libitum
# Psalmodia complementaris
def hora_name(n):
if n == 2:
return "LECTIONIS"
elif n == 3:
return "LAUDES"
elif n == 4:
return "TERTIA"
elif n == 5:
return "SEXTA"
elif n == 6:
return "NONA"
elif n == -4:
return "TERTIA*"
elif n == -5:
return "SEXTA*"
elif n == -6:
return "NONA*"
elif n == 7:
return "VESPERAS"
elif n == 8:
return "COMPLETORIUM"
elif n == 10:
return "LAUDES MATUTINAS ET OFFICIUM LECTIONIS"
else:
raise "Hora " + str(n) + " not recognized"
def clean_data(data, hora, date=None):
data = data.replace('<link rel="stylesheet" type="text/css" href="breviar.css">', "")
data = data.replace('<h1>BREVIARIUM ROMANUM<\/h1>', "")
data = re.sub(r'<style>(\n|.)*<\/style>', '', data)
data = re.sub(r'<h1>(\n|.)*<\/h1>', '', data)
data = r'<h3>' + hora_name(hora) + r'</h3>' + data
if date != None:
data = r'<h1>' + date + r'</h1>' + data
data = re.sub(r'<!--\{TEDEUM_BEGIN\}(\n|.)*<!--\{TEDEUM_END\}-->', '', data)
data = re.sub(r'<script type="text\/javascript">(\n|.)*<\/noscript>', '', data)
data = re.sub(r'<!--(.)+-->', '', data)
data = re.sub(r'<html>(\n|.)*<body>', '', data)
data = re.sub(r'<\/body>(\n|.)*<\/html>', '', data)
data = re.sub(r'<p.*\[ Coniunctiones ostendere \]<\/a><p>', '', data)
data = re.sub(r'<p class=pouzetisk.*<\/p>', '', data)
data = re.sub(r'<!-- Piwik -->(\n|.)*<!-- End Piwik Code -->', '', data)
data = re.sub(r'\n.*©.*\n', '', data)
data = re.sub(r'\n.*©.*\n', '', data)
data = re.sub(r'<font <\/font>', '<\/font>', data)
return data
date_ = date(2024, 10, 1)
date_end = date(2025, 1, 1)
while date_ < date_end:
date_str = date_.strftime("%Y-%m-%d")
for hora in [2, 3, 4, 5, 6, -4, -5, -6, 7, 8]:
if(hora < 0):
# psalmodia complementaris
pcomp = 1
hora_ = hora * (-1)
else:
pcomp = 0
hora_ = hora
url = 'http://breviarium.info/?a={}&datum={}&m=1&dopln={}'.format(hora_, date_str, pcomp)
response = urllib.request.urlopen(url)
data = response.read().decode("utf-8")
data = clean_data(data, hora, date_.strftime("%d-%m-%Y") if hora==2 else None)
dir = "res/{}/{}/".format(date_.year, date_.month)
if not os.path.exists(dir):
os.makedirs(dir)
with open(dir + "{}_{}.html".format(date_str, hora), "w") as f:
f.write(data)
print(date_str)
date_ += timedelta(days=1)
print("DONE")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment