Skip to content

Instantly share code, notes, and snippets.

@slipeer
Last active August 8, 2018 03:23
Show Gist options
  • Save slipeer/52fbbad704d34d76458e305c8f1463cc to your computer and use it in GitHub Desktop.
Save slipeer/52fbbad704d34d76458e305c8f1463cc to your computer and use it in GitHub Desktop.
Synapse clean unclaimed local media.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# For https://github.com/matrix-org/synapse/
# Synapse old local media analyze and cleanup util.
# @author Pavel Kardash <pavel@kardash.su>
"""
Since synapse v0.27.0 there last_accessed_ts column
in table local_media_repository present.
This made it possible to clean unclaimed local media.
Utility finds database connection settings
and media repository path in synapse configuration.
Only the postgres is supported as a database.
Media storage providers are not supported.
The size of folders with thumbnails is not
calculated for execution speedup.
"""
import re
import os
import yaml
import logging
import sys
import shutil
from optparse import OptionParser
LOG_FORMAT = '%(asctime)s %(message)s'
LOG_FILE_FORMAT = '%(asctime)s %(levelname)-8s %(message)s'
LOG_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
class ModException(Exception):
pass
def configReader(file):
with open(file, "r") as f:
return yaml.load(f)
def progress(count, total, status=''):
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '=' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
sys.stdout.flush()
def getDbConnect(config):
if "database" in config:
if config["database"]["name"] == "sqlite3":
raise ModException("Sqlite database unsuported")
elif config["database"]["name"] == "psycopg2":
import psycopg2 as db
else:
raise ModException("Unknown database type in config file")
else:
raise ModException("Database type not found in config file")
if "args" in config["database"]:
return db.connect(**config["database"]["args"])
else:
raise ModException("Database conect string not found in config file")
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def main():
optp = OptionParser(description=__doc__)
optp.add_option(
'-c',
help=u'Homeserver config yaml path '
u'(default /etc/matrix-synapse/homeserver.yaml)',
dest='config',
default="/etc/matrix-synapse/homeserver.yaml")
optp.add_option(
"-l",
help=u'Path to store log '
u'(default /var/log/%s.log)' % os.path.basename(__file__),
dest="logpath",
default="/var/log/%s.log" % os.path.basename(__file__))
optp.add_option(
"-i",
help=u'Interval to clean older messages '
u'(default 3 month)',
dest="interval",
default="3 month")
optp.add_option(
"-d",
help=u'Delete old files. '
u'Without this option, no changes are made, '
u'only counting frees up space.',
action="store_true",
dest="cleanup")
optp.add_option(
"-p",
help=u'Show progress bar.'
u'Size of the freed space is approximate',
action="store_true",
dest="progress")
optp.add_option(
"-v", help=u'Verbose logging', action="store_true", dest="more_info")
opts, args = optp.parse_args()
if opts.more_info:
log_level = logging.INFO
else:
log_level = logging.WARN
logging.basicConfig(
level=log_level,
format=LOG_FILE_FORMAT,
datefmt=LOG_DATE_FORMAT,
filename=opts.logpath)
config = configReader(opts.config)
console = logging.StreamHandler(sys.stdout)
console.setLevel(logging.WARN)
console.setFormatter(
logging.Formatter(LOG_FORMAT, datefmt=LOG_DATE_FORMAT))
logging.getLogger().addHandler(console)
mediaPath = config.get("media_store_path", None)
if not mediaPath:
raise ModException("\"media_store_path\" not found in config file")
conDb = getDbConnect(config)
def remove(path):
fsize = 0
if os.path.isfile(path):
fsize = os.stat(path).st_size
# The size of folders with thumbnails is not
# calculated for execution speedup
if opts.cleanup:
logging.info("Remove %s (%s)." % (path, sizeof_fmt(fsize)))
try:
if os.path.isfile(path):
os.remove(path)
elif os.path.isdir(path):
shutil.rmtree(path)
except OSError as e:
logging.error("OSError: %s - %s." % (e.filename, e.strerror))
pass
else:
logging.info(
"(DRY-RUN!) Remove %s (%s)." % (path, sizeof_fmt(fsize)))
return fsize
def markRemoved(con, media):
media_id = re.sub("^(..).(..).(.+?)$", r"\g<1>" + r"\g<2>" + r"\g<3>",
media)
if opts.cleanup:
logging.info("Set \"cleanup\" database "
"quarantined_by mark to %s." % media_id)
try:
cur = con.cursor()
cur.execute(
"UPDATE local_media_repository "
"SET quarantined_by = 'cleanup' WHERE media_id = %s;",
(media_id, ))
con.commit()
except Exception as e:
logging.error(e)
else:
logging.info("(DRY-RUN!) Set \"cleanup\" database "
"quarantined_by mark to %s." % media_id)
logging.warn("Retrive list of media older then %s." % opts.interval)
cur = conDb.cursor()
cur.execute(
"SELECT media_id FROM local_media_repository WHERE "
"("
" last_access_ts IS NULL "
" OR last_access_ts < (EXTRACT("
" EPOCH FROM TIMESTAMP 'now'::timestamp - %s::interval)"
" )"
") AND "
"quarantined_by IS NULL;", (opts.interval, ))
logging.warn("Found %s old local media." % cur.rowcount)
logging.warn("Start media clenup.")
row = cur.fetchone()
total = 0
while row:
if opts.progress:
progress(
cur.rownumber,
cur.rowcount,
status='Media_id %s total removed about %s' %
(row[0], sizeof_fmt(total)))
name = re.sub("^(..)(..)(.+?)$",
os.path.join(r"\g<1>", r"\g<2>", r"\g<3>"), row[0])
total += remove(os.path.join(mediaPath, "local_content", name))
total += remove(os.path.join(mediaPath, "local_thumbnails", name))
markRemoved(conDb, name)
row = cur.fetchone()
logging.warn("Removed about %s." % sizeof_fmt(total))
exit()
if __name__ == '__main__':
try:
main()
except ModException as e:
logging.error(e)
except Exception as e:
logging.exception(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment