Skip to content

Instantly share code, notes, and snippets.

@craSH
Created April 24, 2010 03:03
Show Gist options
  • Save craSH/377414 to your computer and use it in GitHub Desktop.
Save craSH/377414 to your computer and use it in GitHub Desktop.
Store data somewhere on disk based on that data's sha1 (or whatever) hexdigest. Useful for storing lots of random data on a filesystem when you don't want to use a database for some reason.
#!/usr/bin/env python
#
# (C) Ian Gallagher <crash@neg9.org>
#
def savedata(basedir, data, hashalgo="sha1"):
"""
Given a base directory and chunk of data, store that data in a path composed of it's
SHA1 (default) hexdigest as split in to single-byte hex directories, with the file being the last byte of the digest.
Paramater basedir: The base directory to store this data in
Parameter data: The data to store and which the path is derived from
Parameter hashalgo: The hashing algorithm to use (as supported by python's hashlib module)
Returns a string which is the complete path to the newly written file.
"""
# TODO: Support using a file-like object instead of a 'data' string later
if not basedir:
raise exceptions.ValueError("basedir not provided")
import hashlib
hash = hashlib.new(hashalgo)
hash.update(data)
hex_digest = hash.hexdigest()
fh = hashpath(basedir, hex_digest, 'w')
fh.write(data)
fh.close()
return fh.name
def hashpath(basedir, hash, *args):
if not basedir:
raise exceptions.ValueError("basedir not provided")
if not hash:
raise exceptions.ValueError("hash not provided")
if not args:
args = ('w',)
import os
import exceptions
displit = lambda x, acc=[]: displit(x[2:], acc+[(x[:2])]) if x else acc
hash_chunks = displit(hash)
hash_dirs = hash_chunks[:-1]
hash_file = hash_chunks[-1]
hash_directory = os.path.join(basedir, os.path.sep.join(hash_dirs))
if not os.path.exists(hash_directory):
try:
os.makedirs(hash_directory)
except Exception as ex:
raise(ex)
complete_path = os.path.join(hash_directory, hash_file)
fh = open(complete_path, *args)
if fh:
return fh
else:
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment