Skip to content

Instantly share code, notes, and snippets.

@raph-amiard
Created December 12, 2010 11:28
Show Gist options
  • Save raph-amiard/737990 to your computer and use it in GitHub Desktop.
Save raph-amiard/737990 to your computer and use it in GitHub Desktop.
file tree data structure
from os import walk
from os.path import basename, abspath, join
from time import time
class Node(object):
DIR = 1
FILE = 2
def __init__(self,type, value):
self.children = {}
self.value = value
if type in [self.DIR, self.FILE]:
self.type = type
def add_child(self, type, value, leaf=False):
node = self.children.get(value, Node(type, value))
if leaf:
self.children[value] = None
else:
self.children[value] = node
return node
class FileTree(object):
def __init__(self, files_paths=None):
self.root = Node(Node.DIR, "")
if files_paths:
for file_path in files_paths:
self.add_file(file_path)
def add_file(self, file_path):
pieces = filter(lambda x: x, file_path.split("/"))
self.add_pieces(self.root, pieces)
def add_pieces(self, node, pieces):
if len(pieces) == 1:
node.add_child(Node.FILE, pieces[0], leaf=True)
else:
child_node = node.add_child(Node.DIR, pieces[0])
self.add_pieces(child_node, pieces[1:])
def exists(self, file_path):
pieces = filter(lambda x: x, file_path.split("/"))
return self._exists(self.root, pieces)
def _exists(self, node, pieces):
if len(pieces) > 1:
m = self._exists(node.children[pieces[0]], pieces[1:])
else:
m = True
return node.children.has_key(pieces[0]) and m
def make_filetree(lst):
ft = FileTree()
for root, subfolder, files in lst:
for filename in files:
path = join(abspath(root), basename(filename))
ft.add_file(path)
return ft
def make_fileset(lst):
fs = set()
for root, subfolder, files in lst:
for filename in files:
path = join(abspath(root), basename(filename))
fs.add(path)
return fs
def make_filelist(lst):
l = list()
for root, subfolder, files in lst:
for filename in files:
path = join(abspath(root), basename(filename))
l.append(path)
return l
def benchmark(path):
print "WALKING DIR"
w = list(walk(path))
print "MAKING FILESET"
fs = make_fileset(w)
print "MAKING FILETREE"
ft = make_filetree(w)
print "MAKING LIST"
l = make_filelist(w)
time_start = time()
for path in l:
path in fs
print "TIME FOR FILESET : ", time() - time_start
time_start = time()
for path in l:
ft.exists(path)
print "TIME FOR FILETREE : ", time() - time_start
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment