Skip to content

Instantly share code, notes, and snippets.

@f0k
Created September 5, 2021 07:37
Show Gist options
  • Save f0k/35ed2b6467764845217a31c3fe722b15 to your computer and use it in GitHub Desktop.
Save f0k/35ed2b6467764845217a31c3fe722b15 to your computer and use it in GitHub Desktop.
Merge and deduplicate .ics/.ical files
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Merges and deduplicates one or more .ics / ical files.
For usage information, call with --help.
Author: Jan Schlüter
"""
import sys
import io
from argparse import ArgumentParser
def opts_parser():
usage =\
"""Merges and deduplicates one or more .ics / ical files.
In case of duplicated entries (by UID or DTSTART+SUMMARY),
files listed earlier take precedence.
"""
parser = ArgumentParser(description=usage)
parser.add_argument('infile', nargs='+',
type=str,
help='The .ics or ical files to read.')
parser.add_argument('outfile',
type=str,
help='The .ics or ical file to write.')
parser.add_argument('--verbose', '-v',
action='store_true',
help='If given, report all duplicated entries.')
return parser
def read_objects(fn):
"""
Reads calendar objects from the given .ics / ical file and yields
them. Global calendar properties (e.g., PRODID) are also treated as
objects.
"""
vobject = []
vobject_type = None
with io.open(fn, newline='') as f:
line = f.readline()
if line != "BEGIN:VCALENDAR\r\n":
raise ValueError("%s does not start with BEGIN:VCALENDAR" % fn)
for line in f:
if line == "END:VCALENDAR\r\n":
return
if not vobject_type:
# we're on global scope, check if we have a new object
if line.startswith("BEGIN:"):
vobject_type = line[6:].rstrip("\r\n")
vobject = [line]
else:
yield line
else:
# we're in an object, check if it ended
vobject.append(line)
if line == "END:%s\r\n" % vobject_type:
yield "".join(vobject)
vobject = []
vobject_type = None
def get_identifiers(vobject):
"""
Return a set of identifiers for a given calendar object string or
global calendar property. For the latter, uses the property name.
For objects, uses the object type and UID/TZID if present, and a
combination of object type, DTSTART and SUMMARY if present.
"""
def read_property(name):
"""Returns a property value of our vobject, or "" if not present."""
start_pos = vobject.find(name)
if start_pos < 0:
return ""
end_pos = vobject.find("\r\n", start_pos)
value = vobject[start_pos:end_pos]
# unfold folded lines (linebreak + single whitespace)
while vobject[end_pos + 2] in " \t":
start_pos = end_pos + 3
end_pos = vobject.find("\r\n", start_pos)
value += vobject[start_pos:end_pos]
return value
if not vobject.startswith("BEGIN:"):
# it's a global property, use its name
property_name = vobject.split(":", 1)[0]
return {property_name}
else:
vobject_type = vobject[6:].split("\r\n", 1)[0]
if vobject_type == "VTIMEZONE":
# for timezones, use their identifier
return {(vobject_type, read_property("TZID"))}
else:
# for events, todos, ... use their uid or dtstart+summary
identifiers = set()
uid = read_property("UID")
if uid:
identifiers.add((vobject_type, uid))
dtstart = read_property("DTSTART")
summary = read_property("SUMMARY")
if dtstart or summary:
identifiers.add((vobject_type, dtstart, summary))
return identifiers
def main():
# parse command line
parser = opts_parser()
options = parser.parse_args()
# remember identifiers of calendar objects already written
ids_written = set()
# open output file
with io.open(options.outfile, 'w', newline='') as f:
f.write("BEGIN:VCALENDAR\r\n")
# iterate over calendar objects in input files
for fn in options.infile:
for vobject in read_objects(fn):
# get the set of identifiers for the current object
identifiers = get_identifiers(vobject)
# only write it if none of the identifiers occurred before
if not (identifiers & ids_written):
f.write(vobject)
elif options.verbose:
print("already written:", identifiers & ids_written)
# remember the identifiers
ids_written.update(identifiers)
f.write("END:VCALENDAR\r\n")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment