Skip to content

Instantly share code, notes, and snippets.

@alexpearce
Created October 2, 2018 07:31
Show Gist options
  • Save alexpearce/5bcb5cebcea1747eae04a4c32369ad00 to your computer and use it in GitHub Desktop.
Save alexpearce/5bcb5cebcea1747eae04a4c32369ad00 to your computer and use it in GitHub Desktop.
Snakemake remote file support for a Python dictionary backed by a shelve database.
import shelve
import string
import time
class MeasurementDB(object):
r"""Database for storing measurements, with export to LaTeX macros.
Example usage:
db = MeasurementDB()
db['voltage'] = 5.3
db['current'] = 0.01
# Dump the measurements as LaTeX macros
# Can then use `\voltage` and `\current`
db.to_latex('measurements.tex')
"""
# Extension to give the database file
EXTENSION = '.db'
# Characters allowed in LaTeX macro names
ALLOWED_KEY_CHARACTERS = string.ascii_letters
def __init__(self, path='measurements'):
if not path.endswith(self.EXTENSION):
path += self.EXTENSION
self.path = path
self.db = shelve.open(self.path)
def _check_key_validity(self, key):
"""Raise ValueError if key contains non-alphabetic characters."""
if not all(c in self.ALLOWED_KEY_CHARACTERS for c in key):
raise ValueError('Key {0} contains invalid chararacters'.format(
key
))
def __setitem__(self, key, item):
"""Add the item to the DB, referenced by the key.
The key must be a valid LaTeX macro name, else ValueError is raised.
A creation timestamp is stored alongside the value. This is necessary
for Snakemake to determine whether dependent rules need re-running.
"""
self._check_key_validity(key)
self.db[key] = (int(time.time()), item)
def __getitem__(self, key):
_, value = self.db[key]
return value
def timestamp(self, key):
timestamp, _ = self.db[key]
return timestamp
def to_latex(self, fpath):
"""Save the measurements as LaTeX macros to a file at fpath."""
now = time.strftime('%Y/%m/%d at %H:%M:%S')
lines = [
'% Auto-generated on {0}\n'.format(now)
]
for key in self.db.keys():
value = self[key]
lines.append(r'\def\{0}val {{{1}}}'.format(key, value) + '\n')
with open(fpath, 'w') as f:
f.writelines(lines)
"""Snakemake remote file provider implementation for a shelve database.
Allows keys in a shelve database to be specified as input/output 'files':
from remote_shelve import RemoteProvider
db = RemoteProvider("measurements.db", stay_on_remote=True)
rule export_measurement:
input: db.key("{key}")
output: "output/{key}.tex"
shell: "python export_measurement.py {wildcards.key} > {output}"
rule add_key:
output: db.key("{key,[A-z]+}")
shell: "python create_measurement.py {wildcards.key}"
Because the entries do not represent real files, one must specify
`stay_on_remote=True`.
Note that the full remote 'file' path is the key prefixed with `db:`, e.g.
`db:somekey`.
"""
import shelve
from snakemake.remote import AbstractRemoteObject, AbstractRemoteProvider
class RemoteProvider(AbstractRemoteProvider):
def __init__(self, path, *args, **kwargs):
super(RemoteProvider, self).__init__(path, *args, **kwargs)
self._path = path
@property
def db(self):
"""Return the shelve database.
The instance should be closed after use, either by calling `close()`
explicitly or by using a context manager.
"""
return shelve.open(self._path)
def key(self, name, *args, **kwargs):
"""A more meaningful name for the `remote` method."""
return self.remote(name, *args, **kwargs)
@property
def default_protocol(self):
return 'db:'
@property
def available_protocols(self):
return [self.default_protocol]
class RemoteObject(AbstractRemoteObject):
def exists(self):
with self.provider.db as db:
return self.local_file() in db
def mtime(self):
with self.provider.db as db:
dt, _ = db[self.local_file()]
return dt
def size(self):
return 1
@property
def name(self):
return self.local_file()
def list(self):
with self.provider.db as db:
return db.keys()
def remove(self):
with self.provider.db as db:
del db[self.local_file()]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment