Skip to content

Instantly share code, notes, and snippets.

@whoeverest
Created September 30, 2012 16:59
Show Gist options
  • Save whoeverest/3807595 to your computer and use it in GitHub Desktop.
Save whoeverest/3807595 to your computer and use it in GitHub Desktop.
Directory mapper
import os
from collections import defaultdict
class DB(object):
def __init__(self, location, columns):
self.location = location # path to the location
self.columns = columns # names of the table columns
self.data = []
def _group(self, group_by, results):
""" Imitates the GROUP BY SQL operator.
"""
grouped = defaultdict(list)
for row in results:
grouped[row[group_by]].append(row)
return grouped
def _folders_from_path(self, path):
""" Helper functions that extracts a list of folders from a given
path. A naive solution would be to split the string with delimiter '/',
but that approach fails to deal with backspaces and double crosses.
"""
folders = []
while True:
if path == '/':
break
path, folder = os.path.split(path)
folders.append(folder)
return folders[::-1] # reversed
def sync(self):
""" A function that generates the db table out of the specified location.
"""
for path, dirs, files in os.walk(self.location):
for f in files:
# Folders + the filename split by `.`
all_columns = self._folders_from_path(path) + f.split('.')
# The location of the first useful (real) column
start = len(all_columns) - len(self.columns)
# Get the last N columns
values = all_columns[start:]
# Generate the rows
row = {}
for key, val in zip(self.columns, values):
row[key] = val
# Add the full path to the row
row['full_path'] = os.path.join(path, f)
self.data.append(row)
def get(self, group_by=None, **conditions):
""" Returns a list od rows that satisfy the conditions.
Optionally, you can group the results.
Usage:
>>> db.get() # get all the rows
>>> db.get(project='my_project', version='v1')
>>> db.get(image_id='img1', group_by='size')
"""
self.sync()
if conditions:
results = []
for row in self.data:
if all(row[col] == conditions[col] for col in conditions):
results.append(row)
else:
results = self.data
if group_by:
results = self._group(group_by, results)
return results
db = DB(location="/home/whoeverest/code/swissnet/previewer/projects",
columns=[
'project',
'image_id',
'size',
'version',
'extension',
])
print db.get(image_id='img1')
print db.get(group_by='project', version='v1')
print db.get(group_by='size')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment