Skip to content

Instantly share code, notes, and snippets.

@peterspackman
Created January 8, 2021 03:27
Show Gist options
  • Save peterspackman/928467e0c90f7771d20ea74d2f8f0eb4 to your computer and use it in GitHub Desktop.
Save peterspackman/928467e0c90f7771d20ea74d2f8f0eb4 to your computer and use it in GitHub Desktop.
SQL compressed file archive
#!/usr/bin/env python
from __future__ import print_function
import argparse
from datetime import datetime, timedelta
import logging
import os
import sqlite3
import zlib
import stat
import time
from collections import namedtuple
SqliteArchiveFile = namedtuple('SqliteArchiveFile', 'name mode mtime sz')
LOG = logging.getLogger("sqlar.py")
_filemode_table = (
((stat.S_IFLNK, "l"),
(stat.S_IFREG, "-"),
(stat.S_IFBLK, "b"),
(stat.S_IFDIR, "d"),
(stat.S_IFCHR, "c"),
(stat.S_IFIFO, "p")),
((stat.S_IRUSR, "r"),),
((stat.S_IWUSR, "w"),),
((stat.S_IXUSR|stat.S_ISUID, "s"),
(stat.S_ISUID, "S"),
(stat.S_IXUSR, "x")),
((stat.S_IRGRP, "r"),),
((stat.S_IWGRP, "w"),),
((stat.S_IXGRP|stat.S_ISGID, "s"),
(stat.S_ISGID, "S"),
(stat.S_IXGRP, "x")),
((stat.S_IROTH, "r"),),
((stat.S_IWOTH, "w"),),
((stat.S_IXOTH|stat.S_ISVTX, "t"),
(stat.S_ISVTX, "T"),
(stat.S_IXOTH, "x"))
)
def filemode(mode):
"""Convert a file's mode to a string of the form '-rwxrwxrwx'."""
perm = []
for table in _filemode_table:
for bit, char in table:
if mode & bit == bit:
perm.append(char)
break
else:
perm.append("-")
return "".join(perm)
def readable_size(num, suffix='B'):
for unit in ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'):
if abs(num) < 1024.0:
return "{:3.1f}{}{}".format(num, unit, suffix)
num /= 1024.0
return "{:.1f}{}{}".format(num, "Yi", suffix)
def readable_time(td):
fmt = '{:0.2f} {}'
ms = float(td.microseconds)/1000
if ms > 100:
return fmt.format(td.seconds + ms/1000, "s")
else:
return fmt.format(ms, "ms")
return result
class SqliteArchive(object):
_filename = None
_conn = None
_cursor = None
_SCHEMA = """
create table if not exists sqlar(
name text primary key,
mode int,
mtime int,
sz int,
data blob);
"""
def __init__(self, filename):
self._filename = filename
self._connect()
self._create_table()
def _connect(self):
self._conn = sqlite3.connect(self._filename)
self._cursor = self._conn.cursor()
def _create_table(self):
self._cursor.execute(self._SCHEMA)
@property
def filename(self):
return self._filename
def size(self):
return self._cursor.execute(
'select sum(sz) from sqlar').fetchone()[0]
def compressed_size(self):
return self._cursor.execute(
'select sum(length(data)) from sqlar').fetchone()[0]
def disk_size(self):
return os.stat(self._filename).st_size
def contains(self, filename):
row = self._cursor.execute('select name, mode, mtime, sz '
'from sqlar where name = ?', (filename,)).fetchone()
file_info = None
if row:
file_info = SqliteArchiveFile(*row)
return file_info
def add(self, filename):
if filename == self._filename:
LOG.warn("s %s -- "
"not possible to add an archive to itself", filename)
return
stats = os.stat(filename)
f_info = self.contains(filename)
if f_info and stats.st_mtime <= f_info.mtime:
LOG.debug("s %s -- "
"no change since last write", filename)
return
LOG.debug('a %s', filename)
with open(filename, 'rb') as f:
contents = f.read()
self._cursor.execute(
"insert or replace into sqlar values(?,?,?,?,?)",
(filename.lstrip('./'),
stats.st_mode,
stats.st_mtime,
stats.st_size,
sqlite3.Binary(zlib.compress(contents)))
)
self._conn.commit()
def extract(self, filename):
self._cursor.execute(
"select mtime, data from sqlar where name = ?", (filename,))
mtime, data = self._cursor.fetchone()
# place the file under the cwd
try:
os.makedirs(os.path.dirname(filename))
except OSError:
pass
LOG.debug("x %s", filename)
with open(filename, 'wb') as f:
f.write(zlib.decompress(data))
stats = os.stat(filename)
# set correct mtime
os.utime(filename, (stats.st_atime, mtime))
@property
def files(self):
for row in self._cursor.execute(
"select name, mode, mtime, sz from sqlar").fetchall():
yield SqliteArchiveFile(*row)
def find(self, pattern):
for row in self._cursor.execute(
"select name, mode, mtime, sz from sqlar where name like ?",
(pattern,)).fetchall():
yield SqliteArchiveFile(*row)
def contents(self, filename, decode=None):
self._cursor.execute(
"select data from sqlar where name = ?", (filename,))
data = self._cursor.fetchone()
if data is None:
return data
contents = zlib.decompress(data[0])
if decode:
contents = contents.decode(decode)
return contents
def ls(self):
lines = []
for f in self.files:
time = datetime.fromtimestamp(f.mtime).strftime("%b %d %H:%M").rjust(12)
size = readable_size(f.sz).rjust(10)
lines.append(' '.join((filemode(f.mode), size, time, f.name)))
return '\n'.join(lines)
def __len__(self):
return self._cursor.execute('select count(*) from sqlar').fetchone()[0]
def main():
times = {
'start': time.time(),
}
parser = argparse.ArgumentParser()
parser.add_argument("ARCHIVE", type=str,
help="Archive filename")
parser.add_argument("FILES", nargs='*', type=str,
help="File names to add to archive")
parser.add_argument("-l", action='store_true', default=False,
help="See the contents of the archive.")
parser.add_argument("-x", action='store_true', default=False,
help="Extract the contents of an archive.")
parser.add_argument("-v", action='store_true', default=False,
help="Enable verbose output.")
parser.add_argument("-r", action='store_true', default=False,
help='Report time and size information')
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.v else logging.INFO,
format="%(message)s")
times['s_read'] = time.time()
archive = SqliteArchive(args.ARCHIVE)
times['e_read'] = time.time()
if args.x:
if args.FILES:
for f in args.FILES:
archive.extract(f.lstrip('./'))
else:
for f in archive.files:
archive.extract(f.name)
elif args.l:
LOG.info("%s\n%s", archive.filename, archive.ls())
else:
for f in args.FILES:
archive.add(f)
times['done'] = time.time()
if args.r:
size_uncompressed = archive.size()
size_compressed = archive.compressed_size()
row_marker = '+' + '-' * 31 + '+'
LOG.info('\nSize usage')
LOG.info(row_marker)
LOG.info("| Raw blobs %s |", readable_size(size_uncompressed).rjust(12))
LOG.info("| Compressed blobs %s |", readable_size(size_compressed).rjust(12))
LOG.info("| Disk size %s |", readable_size(archive.disk_size()).rjust(12))
LOG.info("| Disk size (%%) %s |",
"{:>12.2f}".format(100 * float(archive.disk_size())/size_uncompressed))
LOG.info(row_marker)
LOG.info('\nTime usage')
LOG.info(row_marker)
total = readable_time(timedelta(seconds=times['done'] - times['start']))
parse = readable_time(timedelta(seconds=times['s_read'] - times['start']))
read = readable_time(timedelta(seconds=times['e_read'] - times['s_read']))
task = readable_time(timedelta(seconds=times['done'] - times['e_read']))
per_file = (times['done'] - times['e_read'])/len(archive)
per_file = readable_time(timedelta(seconds=per_file))
LOG.info("| Parse args %s |", parse.rjust(12))
LOG.info("| Read sqlite %s |", read.rjust(12))
LOG.info("| Task %s |", task.rjust(12))
LOG.info("| Per file %s |", per_file.rjust(12))
LOG.info("| Total %s |", total.rjust(12))
LOG.info(row_marker)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment