Created
September 30, 2012 16:59
-
-
Save whoeverest/3807595 to your computer and use it in GitHub Desktop.
Directory mapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from collections import defaultdict | |
class DB(object): | |
def __init__(self, location, columns): | |
self.location = location # path to the location | |
self.columns = columns # names of the table columns | |
self.data = [] | |
def _group(self, group_by, results): | |
""" Imitates the GROUP BY SQL operator. | |
""" | |
grouped = defaultdict(list) | |
for row in results: | |
grouped[row[group_by]].append(row) | |
return grouped | |
def _folders_from_path(self, path): | |
""" Helper functions that extracts a list of folders from a given | |
path. A naive solution would be to split the string with delimiter '/', | |
but that approach fails to deal with backspaces and double crosses. | |
""" | |
folders = [] | |
while True: | |
if path == '/': | |
break | |
path, folder = os.path.split(path) | |
folders.append(folder) | |
return folders[::-1] # reversed | |
def sync(self): | |
""" A function that generates the db table out of the specified location. | |
""" | |
for path, dirs, files in os.walk(self.location): | |
for f in files: | |
# Folders + the filename split by `.` | |
all_columns = self._folders_from_path(path) + f.split('.') | |
# The location of the first useful (real) column | |
start = len(all_columns) - len(self.columns) | |
# Get the last N columns | |
values = all_columns[start:] | |
# Generate the rows | |
row = {} | |
for key, val in zip(self.columns, values): | |
row[key] = val | |
# Add the full path to the row | |
row['full_path'] = os.path.join(path, f) | |
self.data.append(row) | |
def get(self, group_by=None, **conditions): | |
""" Returns a list od rows that satisfy the conditions. | |
Optionally, you can group the results. | |
Usage: | |
>>> db.get() # get all the rows | |
>>> db.get(project='my_project', version='v1') | |
>>> db.get(image_id='img1', group_by='size') | |
""" | |
self.sync() | |
if conditions: | |
results = [] | |
for row in self.data: | |
if all(row[col] == conditions[col] for col in conditions): | |
results.append(row) | |
else: | |
results = self.data | |
if group_by: | |
results = self._group(group_by, results) | |
return results | |
db = DB(location="/home/whoeverest/code/swissnet/previewer/projects", | |
columns=[ | |
'project', | |
'image_id', | |
'size', | |
'version', | |
'extension', | |
]) | |
print db.get(image_id='img1') | |
print db.get(group_by='project', version='v1') | |
print db.get(group_by='size') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment