Skip to content

Instantly share code, notes, and snippets.

@kanzure
Last active July 15, 2020 02:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kanzure/b806e07bbe83357c6f9864b7644d40e9 to your computer and use it in GitHub Desktop.
Save kanzure/b806e07bbe83357c6f9864b7644d40e9 to your computer and use it in GitHub Desktop.
tool for timestamping IRC logs
"""
Timestamps!
This file creates opentimestamps timestamps for daily IRC logs. It works when
running on a daily basis (called from a cronjob) or when called manually with
multiple days since the last run.
The script will identify all new log files and create timestamps for those new
logfiles.
The script will not timestamp "today's" log until tomorrow because the log is
incomplete until it is closed out for the day.
:author: Bryan Bishop <kanzure@gmail.com>
:date: 2020-02-10
"""
import os
import sh
from datetime import datetime, timedelta
import itertools
# Enable DRY_RUN by setting this value to True to use readonly mode.
DRY_RUN = False
# List of directories that have relevant IRC logs. If the IRC logs were more
# organized (like "they are all under the directory irclogs/") then this list
# wouldn't be needed.... oh well.
DIRNAMES = [
"logs/",
"bitcoin-core-dev/",
"bitcoin-wizards/",
"bitmetas/",
"ctv-bip-review/",
"c-lightning/",
"lightning-dev/",
"lnd/",
"rust-bitcoin/",
"secp256k1/",
"joinmarket/",
"bitcoin-builds/",
"braidpool/",
"taproot-bip-review/",
"utreexo/",
]
def fromisoformat(date_fragment):
"""
Apparently datetime.fromisoformat() is unavailable in older versions of
python3...
"""
return datetime.strptime(date_fragment, "%Y-%m-%d")
def grouper(n, iterable):
"""
Convert a list of items (iterable) into a list of lists each of some
maximum size (n).
"""
args = [iter(iterable)] * n
return ([e for e in t if e != None] for t in itertools.zip_longest(*args))
def get_last_few_days(n):
"""
Get a list of datetimes objects for the past few days.
"""
# Use x+2 because yesterday's log is not ready yet (the timestamp was just
# created a moment ago).
dates = [(datetime.now() - timedelta(days=x+2)) for x in range(0, n)]
return dates
def get_last_few_days_date_fragments(n):
"""
Convert the last few days (from datetimes) into strings with "%Y-%m-%d"
format, to match the basename of the IRC log filenames without the file
extension.
"""
dates = get_last_few_days(n)
date_fragments = [date.strftime("%Y-%m-%d") for date in dates]
return date_fragments
def upgrade_recent_timestamps(dirname, daysback=3):
"""
Upgrade the past few days of timestamps.
"""
timestamp_dirpath = os.path.join(dirname, "timestamps")
date_fragments = get_last_few_days_date_fragments(n=daysback)
timestamp_filenames = [os.path.join(timestamp_dirpath, date_fragment + ".log.ots") for date_fragment in date_fragments]
if daysback > 400:
raise Exception("grouping not implemented for ots upgrade")
existant_files = []
for timestamp_filename in timestamp_filenames:
if not os.path.exists(timestamp_filename):
print("Can't upgrade {} because the file doesn't exist".format(timestamp_filename))
continue
else:
existant_files.append(timestamp_filename)
print("Upgrading {} timestamps for {}".format(len(existant_files), dirname))
try:
sh.ots("upgrade", *existant_files)
except Exception as exc:
print("Encountered an exception when upgrading timestamps, the files were: {}".format(existant_files))
print("Continuing...")
else:
print("Done upgrading timestamps.")
# get the latest time stamped date
def get_most_recent_timestamp(timestamp_dir):
"""
Find the youngest timestamp in the timestamps directory.
"""
timestamp_filenames = os.listdir(timestamp_dir)
dates = []
for timestamp_filename in timestamp_filenames:
# get just the filename, not any of the path components
timestamp_filename = os.path.basename(timestamp_filename)
# skip files that aren't .log.ots files
if ".log.ots" not in timestamp_filename or ".log.ots.bak" in timestamp_filename or "-initial" in timestamp_filename:
continue
#print("timestamp filename: {}".format(timestamp_filename))
date_fragment = timestamp_filename[: 0 - len(".log.ots")]
date = fromisoformat(date_fragment)
dates.append(date)
# datetime sorting gives the youngest timestamp at the end of the list
sorted_dates = sorted(dates)
if len(sorted_dates) == 0:
return datetime.strptime("1900-01-01", "%Y-%m-%d")
# Most recent one is going to now be at the end of the list, so get the
# last item from the list.
latest_timestamp = sorted_dates[-1]
return latest_timestamp
def make_timestamps(dirname):
"""
Call opentimestamps (ots stamp) for each new logfile that hasn't been
timestamped yet, based on the assumption that the youngest timestamp (by
filename) in the timestamps/ directory is where you left off timestamping.
"""
timestamp_dirname = os.path.join(dirname, "timestamps")
latest_timestamp = get_most_recent_timestamp(timestamp_dirname)
todays_date_fragment = datetime.now().strftime("%Y-%m-%d")
#todays_date_fragment = fromisoformat(datetime.strptime("2020-05-10", "%Y-%m-%d")
potential_log_files = sorted(os.listdir(dirname))
log_filenames = []
for log_filename in potential_log_files:
log_filename = os.path.join(dirname, log_filename)
# All log files are in %Y-%m-%d format. However, some other files are
# named .log that aren't date logs. These should be skipped- do so by
# checking if the first character is "2".
# Also: check that the filename has ".log" in it.
if ".log" in log_filename and log_filename[-4:] == ".log" and os.path.basename(log_filename)[0] in ["2", "1"] and ".log.ots.bak" not in log_filename and "-initial" not in log_filename:
date_fragment = os.path.basename(log_filename)[:-4]
date = fromisoformat(date_fragment)
if date > latest_timestamp and date_fragment != todays_date_fragment:
# We don't timestamp log files for today's date because the log
# is only finished until after the current day passes.
log_filenames.append(log_filename)
if len(potential_log_files) == 0:
raise Exception("No log files found.")
log_filename_groups = list(grouper(500, log_filenames))
print("Created {} groups".format(len(log_filename_groups)))
for subgroup_log_filenames in log_filename_groups:
filename_args = [os.path.abspath(log_filename) for log_filename in subgroup_log_filenames]
print("Timestamping a group with these files: {}".format(filename_args))
if not DRY_RUN:
# --nowatch is from the watchlist branch
sh.ots("stamp", "--nowatch", *filename_args)
for log_filename in subgroup_log_filenames:
timestamp_filename = log_filename + ".ots"
print("Moving timestamp {}".format(timestamp_filename))
# --no-clobber to not overwrite existing timestamps
sh.mv("--no-clobber", timestamp_filename, timestamp_dirname)
# Add this timestamp to the ots watchlist.
fd = open(os.path.expanduser("~/.cache/ots/watchlist"), "a")
fd.write(os.path.abspath(os.path.join(timestamp_dirname, os.path.basename(timestamp_filename))) + "\n")
fd.close()
elif DRY_RUN:
for log_filename in subgroup_log_filenames:
timestamp_filename = log_filename + ".ots"
print("Moving timestamp {}".format(timestamp_filename))
if __name__ == "__main__":
# This script should be executing in the parent directory.
#chdir_dest = os.path.dirname(os.path.abspath(DIRNAMES[0]))
#print("Changing working directory to {}".format(chdir_dest))
#os.chdir(chdir_dest)
for dirname in DIRNAMES:
print("Processing {}".format(dirname))
make_timestamps(dirname)
# Upgrading takes a while so let's wait to upgrade until after new
# timestamping is completed.
for dirname in DIRNAMES:
# daysback must be at least 3, because the youngest timestamp that we
# can upgrade is by definition yesterday's yesterday's log's timestamp,
# not yesterday's log's timestamp which was just created a few moments
# ago and is by definition not mature yet.
upgrade_recent_timestamps(dirname, daysback=5)
# The local ots version is using the watchlist branch.
# https://github.com/opentimestamps/opentimestamps-client/pull/109
print("Running ots upgradewatchlist ... (will upgrade more than just IRC log timestamps, but whatever)")
try:
sh.ots("upgradewatchlist")
except Exception:
print("Got an error while running upgradewatchlist. Ignoring (probably immature timestamp).")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment