Created
January 8, 2021 03:27
-
-
Save peterspackman/928467e0c90f7771d20ea74d2f8f0eb4 to your computer and use it in GitHub Desktop.
SQL compressed file archive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import argparse | |
from datetime import datetime, timedelta | |
import logging | |
import os | |
import sqlite3 | |
import zlib | |
import stat | |
import time | |
from collections import namedtuple | |
SqliteArchiveFile = namedtuple('SqliteArchiveFile', 'name mode mtime sz') | |
LOG = logging.getLogger("sqlar.py") | |
_filemode_table = ( | |
((stat.S_IFLNK, "l"), | |
(stat.S_IFREG, "-"), | |
(stat.S_IFBLK, "b"), | |
(stat.S_IFDIR, "d"), | |
(stat.S_IFCHR, "c"), | |
(stat.S_IFIFO, "p")), | |
((stat.S_IRUSR, "r"),), | |
((stat.S_IWUSR, "w"),), | |
((stat.S_IXUSR|stat.S_ISUID, "s"), | |
(stat.S_ISUID, "S"), | |
(stat.S_IXUSR, "x")), | |
((stat.S_IRGRP, "r"),), | |
((stat.S_IWGRP, "w"),), | |
((stat.S_IXGRP|stat.S_ISGID, "s"), | |
(stat.S_ISGID, "S"), | |
(stat.S_IXGRP, "x")), | |
((stat.S_IROTH, "r"),), | |
((stat.S_IWOTH, "w"),), | |
((stat.S_IXOTH|stat.S_ISVTX, "t"), | |
(stat.S_ISVTX, "T"), | |
(stat.S_IXOTH, "x")) | |
) | |
def filemode(mode): | |
"""Convert a file's mode to a string of the form '-rwxrwxrwx'.""" | |
perm = [] | |
for table in _filemode_table: | |
for bit, char in table: | |
if mode & bit == bit: | |
perm.append(char) | |
break | |
else: | |
perm.append("-") | |
return "".join(perm) | |
def readable_size(num, suffix='B'): | |
for unit in ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'): | |
if abs(num) < 1024.0: | |
return "{:3.1f}{}{}".format(num, unit, suffix) | |
num /= 1024.0 | |
return "{:.1f}{}{}".format(num, "Yi", suffix) | |
def readable_time(td): | |
fmt = '{:0.2f} {}' | |
ms = float(td.microseconds)/1000 | |
if ms > 100: | |
return fmt.format(td.seconds + ms/1000, "s") | |
else: | |
return fmt.format(ms, "ms") | |
return result | |
class SqliteArchive(object): | |
_filename = None | |
_conn = None | |
_cursor = None | |
_SCHEMA = """ | |
create table if not exists sqlar( | |
name text primary key, | |
mode int, | |
mtime int, | |
sz int, | |
data blob); | |
""" | |
def __init__(self, filename): | |
self._filename = filename | |
self._connect() | |
self._create_table() | |
def _connect(self): | |
self._conn = sqlite3.connect(self._filename) | |
self._cursor = self._conn.cursor() | |
def _create_table(self): | |
self._cursor.execute(self._SCHEMA) | |
@property | |
def filename(self): | |
return self._filename | |
def size(self): | |
return self._cursor.execute( | |
'select sum(sz) from sqlar').fetchone()[0] | |
def compressed_size(self): | |
return self._cursor.execute( | |
'select sum(length(data)) from sqlar').fetchone()[0] | |
def disk_size(self): | |
return os.stat(self._filename).st_size | |
def contains(self, filename): | |
row = self._cursor.execute('select name, mode, mtime, sz ' | |
'from sqlar where name = ?', (filename,)).fetchone() | |
file_info = None | |
if row: | |
file_info = SqliteArchiveFile(*row) | |
return file_info | |
def add(self, filename): | |
if filename == self._filename: | |
LOG.warn("s %s -- " | |
"not possible to add an archive to itself", filename) | |
return | |
stats = os.stat(filename) | |
f_info = self.contains(filename) | |
if f_info and stats.st_mtime <= f_info.mtime: | |
LOG.debug("s %s -- " | |
"no change since last write", filename) | |
return | |
LOG.debug('a %s', filename) | |
with open(filename, 'rb') as f: | |
contents = f.read() | |
self._cursor.execute( | |
"insert or replace into sqlar values(?,?,?,?,?)", | |
(filename.lstrip('./'), | |
stats.st_mode, | |
stats.st_mtime, | |
stats.st_size, | |
sqlite3.Binary(zlib.compress(contents))) | |
) | |
self._conn.commit() | |
def extract(self, filename): | |
self._cursor.execute( | |
"select mtime, data from sqlar where name = ?", (filename,)) | |
mtime, data = self._cursor.fetchone() | |
# place the file under the cwd | |
try: | |
os.makedirs(os.path.dirname(filename)) | |
except OSError: | |
pass | |
LOG.debug("x %s", filename) | |
with open(filename, 'wb') as f: | |
f.write(zlib.decompress(data)) | |
stats = os.stat(filename) | |
# set correct mtime | |
os.utime(filename, (stats.st_atime, mtime)) | |
@property | |
def files(self): | |
for row in self._cursor.execute( | |
"select name, mode, mtime, sz from sqlar").fetchall(): | |
yield SqliteArchiveFile(*row) | |
def find(self, pattern): | |
for row in self._cursor.execute( | |
"select name, mode, mtime, sz from sqlar where name like ?", | |
(pattern,)).fetchall(): | |
yield SqliteArchiveFile(*row) | |
def contents(self, filename, decode=None): | |
self._cursor.execute( | |
"select data from sqlar where name = ?", (filename,)) | |
data = self._cursor.fetchone() | |
if data is None: | |
return data | |
contents = zlib.decompress(data[0]) | |
if decode: | |
contents = contents.decode(decode) | |
return contents | |
def ls(self): | |
lines = [] | |
for f in self.files: | |
time = datetime.fromtimestamp(f.mtime).strftime("%b %d %H:%M").rjust(12) | |
size = readable_size(f.sz).rjust(10) | |
lines.append(' '.join((filemode(f.mode), size, time, f.name))) | |
return '\n'.join(lines) | |
def __len__(self): | |
return self._cursor.execute('select count(*) from sqlar').fetchone()[0] | |
def main(): | |
times = { | |
'start': time.time(), | |
} | |
parser = argparse.ArgumentParser() | |
parser.add_argument("ARCHIVE", type=str, | |
help="Archive filename") | |
parser.add_argument("FILES", nargs='*', type=str, | |
help="File names to add to archive") | |
parser.add_argument("-l", action='store_true', default=False, | |
help="See the contents of the archive.") | |
parser.add_argument("-x", action='store_true', default=False, | |
help="Extract the contents of an archive.") | |
parser.add_argument("-v", action='store_true', default=False, | |
help="Enable verbose output.") | |
parser.add_argument("-r", action='store_true', default=False, | |
help='Report time and size information') | |
args = parser.parse_args() | |
logging.basicConfig(level=logging.DEBUG if args.v else logging.INFO, | |
format="%(message)s") | |
times['s_read'] = time.time() | |
archive = SqliteArchive(args.ARCHIVE) | |
times['e_read'] = time.time() | |
if args.x: | |
if args.FILES: | |
for f in args.FILES: | |
archive.extract(f.lstrip('./')) | |
else: | |
for f in archive.files: | |
archive.extract(f.name) | |
elif args.l: | |
LOG.info("%s\n%s", archive.filename, archive.ls()) | |
else: | |
for f in args.FILES: | |
archive.add(f) | |
times['done'] = time.time() | |
if args.r: | |
size_uncompressed = archive.size() | |
size_compressed = archive.compressed_size() | |
row_marker = '+' + '-' * 31 + '+' | |
LOG.info('\nSize usage') | |
LOG.info(row_marker) | |
LOG.info("| Raw blobs %s |", readable_size(size_uncompressed).rjust(12)) | |
LOG.info("| Compressed blobs %s |", readable_size(size_compressed).rjust(12)) | |
LOG.info("| Disk size %s |", readable_size(archive.disk_size()).rjust(12)) | |
LOG.info("| Disk size (%%) %s |", | |
"{:>12.2f}".format(100 * float(archive.disk_size())/size_uncompressed)) | |
LOG.info(row_marker) | |
LOG.info('\nTime usage') | |
LOG.info(row_marker) | |
total = readable_time(timedelta(seconds=times['done'] - times['start'])) | |
parse = readable_time(timedelta(seconds=times['s_read'] - times['start'])) | |
read = readable_time(timedelta(seconds=times['e_read'] - times['s_read'])) | |
task = readable_time(timedelta(seconds=times['done'] - times['e_read'])) | |
per_file = (times['done'] - times['e_read'])/len(archive) | |
per_file = readable_time(timedelta(seconds=per_file)) | |
LOG.info("| Parse args %s |", parse.rjust(12)) | |
LOG.info("| Read sqlite %s |", read.rjust(12)) | |
LOG.info("| Task %s |", task.rjust(12)) | |
LOG.info("| Per file %s |", per_file.rjust(12)) | |
LOG.info("| Total %s |", total.rjust(12)) | |
LOG.info(row_marker) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment