Skip to content

Instantly share code, notes, and snippets.

@yalov
Last active January 1, 2023 22:34
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yalov/055e636e6bfc35c7d7b096aa8aa26c0d to your computer and use it in GitHub Desktop.
Save yalov/055e636e6bfc35c7d7b096aa8aa26c0d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Download and add description to ical files from the calend.ru
requirements:
>= Python3.6
pip install icalendar trafilatura colorama
version: 11
Created on Fri Jan 20 21:55:59 2017. @author: yalov
Public domain license.
"""
import argparse
import os
import re
import sys
import time
import requests
import trafilatura
import colorama
from icalendar import Calendar
colorama.init()
def download(year):
calends = ["ical-belorus.ics", "ical-jew.ics", "ical-russtate.ics", "ical-ukraine.ics", "ical-wholeworld.ics"]
# [{"url":None, "name":None}]
data = []
for calend in calends:
data.append({"url": f"https://www.calend.ru/ical/{calend}?v=yy{year}&b=1", "name":calend })
for d in data:
r = requests.get(d["url"], allow_redirects=True)
if not os.path.exists(year):
os.mkdir(year)
path = os.path.join(year, d["name"])
print(path)
open(path, 'wb').write(r.content)
def get_description(url):
"""Get holiday page from calend.ru, return the cleaned summary."""
downloaded = trafilatura.fetch_url(url)
repeat = 3
while (not downloaded and repeat > 0):
repeat = repeat - 1
print (colorama.Fore.RED + "fetch failed ... " + colorama.Fore.RESET , end = "")
time.sleep(2)
downloaded = trafilatura.fetch_url(url)
print ("fetched ... ", end = "")
desc = trafilatura.extract(downloaded)
pattern1 = re.compile(r'Фото: .+, .+')
desc = re.sub(pattern1, ' ', str(desc))
print ("extracted ... ", end = "")
return desc
def proceed(source_path, destination_path):
parentfolder = os.path.dirname(source_path)
print("Proceed: {} -> {}".format(source_path, destination_path))
# open source iCalendar
# change Name and Desc. of Calendar, and summary of holiday
with open(source_path, 'rb') as g:
gcal = Calendar.from_ical(g.read())
count = len(gcal.walk()) - 1
i = 0
for component in gcal.walk():
if component.name == "VCALENDAR":
m = re.match(r"^([a-zA-Z.]+)[ -]+([\w ]+)[ -]+(\d+)$",
component['X-WR-CALNAME'])
name = m.group(2).strip() + '\'' + m.group(3)
component['X-WR-CALNAME'] = name
print('Name = ', name)
with open(os.path.join(parentfolder, "log.txt"), 'a') as f:
f.write(name + "\n")
f.write("count: {}\n".format(count))
if component.name == "VEVENT":
URL = component['COMMENT'].replace('https://www.calend.ruhttps://','https://')
Title = component['SUMMARY']
print('({0}/{1}) {2}... '.format(i, count, colorama.Fore.YELLOW + Title + colorama.Fore.RESET), end = "")
url_code = "".join([c for c in URL if c.isdigit()])
filename = re.sub(r'[^\w\-\. ]', '_', url_code + " " + Title) # remove anything that is not alphanumeric .,-_ and space
filepath = os.path.join(parentfolder, "fetched", filename + ".txt")
if os.path.exists(filepath):
try:
description = open(filepath, encoding='utf-8').read()
except:
description = open(filepath).read()
print (colorama.Fore.GREEN + "read" + colorama.Fore.RESET)
else:
description = get_description(URL)
os.makedirs(os.path.dirname(filepath), exist_ok=True)
open(filepath, 'w',encoding='utf-8').write(description)
print ("saved")
try:
time.sleep(0.5)
except:
print ("Exit.")
sys.exit(0)
component['DESCRIPTION'] = URL + '\n' + description
i += 1
with open(destination_path, 'wb') as f:
f.write(gcal.to_ical())
print("Done.\n\n")
def proceedfolder(folder):
for f in os.listdir(folder):
# print (f)
if os.path.isfile(folder + "/" + f) and f[-4:] == ".ics" and f[-8:] != "-out.ics":
proceed(folder + "/" + f, folder + "/" + f[:-4] + "-out.ics")
else:
print("Not valid: " + folder + "/" + f)
if __name__ == '__main__':
print(sys.version)
parser = argparse.ArgumentParser(description='Download and add Description to ical files using the calend.ru')
parser.add_argument('files', type=str, nargs='*',
help="path to file(s) with or without the extension \".ics\" or folder(s)")
parser.add_argument('-d', '--download', metavar='year',
help="download calendars to year subfolder (ignore files argument)")
namespace = parser.parse_args(sys.argv[1:])
if namespace.download:
download(namespace.download)
exit()
for path in namespace.files:
if os.path.isfile(path + ".ics"):
proceed(path + ".ics", path + "-out.ics")
elif os.path.isfile(path) and path[-4:] == '.ics':
proceed(path, path[:-4] + "-out.ics")
elif os.path.isdir(path):
proceedfolder(path)
else:
print("Not valid: " + path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment