Skip to content

Instantly share code, notes, and snippets.

@moreati
Last active July 29, 2022 20:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moreati/8e9e1797d679cb16174e9d4beeceb4c2 to your computer and use it in GitHub Desktop.
Save moreati/8e9e1797d679cb16174e9d4beeceb4c2 to your computer and use it in GitHub Desktop.
Prototyping an `os.walk()` alternative
import os
import shutil
import string
import timeit
class FakeDirEntry:
def __init__(self, path):
self.path = path
self.name = path
def __repr__(self):
return f'<{__class__.__name__} {self.path!r}>'
def walk2(top, *, follow_symlinks=False):
"""
Yield a DirEntry object for each directory or file rooted at top
(including top itself, but excluding '.' and '..').
This aims to be a faster alternative to `os.walk`, by avoiding allocations
and construction of lists.
"""
if not isinstance(top, (os.DirEntry, FakeDirEntry)):
yield FakeDirEntry(top)
else:
yield top
yield from _walk2(top, follow_symlinks=follow_symlinks)
def _walk2(path, *, follow_symlinks=False):
with os.scandir(path) as it:
for entry in it:
skip = yield entry
if skip:
continue
if entry.is_dir(follow_symlinks=follow_symlinks):
yield from _walk2(entry)
def setup(top):
try:
shutil.rmtree(top)
except FileNotFoundError:
pass
os.mkdir(top)
dirs = []
dirnames = set(string.ascii_lowercase)
filenames = [f'f{i}' for i in range(10)]
try:
dirs.append(os.getcwd())
os.chdir(top)
for l1 in dirnames:
os.mkdir(l1)
dirs.append(os.getcwd())
os.chdir(l1)
for fname in filenames:
with open(fname, 'wb') as f: pass
for l2 in dirnames:
os.mkdir(l2)
dirs.append(os.getcwd())
os.chdir(l2)
for fname in filenames:
with open(fname, 'wb') as f: pass
for l3 in dirnames:
os.mkdir(l3)
dirs.append(os.getcwd())
os.chdir(l3)
for fname in filenames:
with open(fname, 'wb') as f: pass
os.chdir(dirs.pop())
os.chdir(dirs.pop())
os.chdir(dirs.pop())
finally:
os.chdir(dirs[0])
def os_walk_tally(top):
for _, _, files in os.walk(top):
yield 1
for f in files:
yield 1
if __name__ == '__main__':
workdir = '/tmp/src'
repetitions = 1
setup(workdir)
w2_count = sum(1 for e in walk2(workdir))
w2_total_duration = timeit.timeit(
stmt='sum(1 for e in walk2(workdir))',
globals={'walk2': walk2, 'workdir': workdir},
number=repetitions,
)
w2_duration = w2_total_duration/repetitions
w_count = sum(os_walk_tally(workdir))
w_total_duration = timeit.timeit(
stmt='sum(os_walk_tally(workdir))',
globals={'os_walk_tally': os_walk_tally, 'workdir': workdir},
number=repetitions,
)
w_duration = w_total_duration/repetitions
w_relative = w_duration/w_duration
w2_relative = w2_duration/w_duration
print(f'os.walk: count={w_count}\ttime={w_duration:.3}s\trelative={w_relative:.2}x')
print(f'walk2 : count={w2_count}\ttime={w2_duration:.3}s\trelative={w2_relative:.2}x')
@moreati
Copy link
Author

moreati commented Sep 23, 2021

Marginally faster on deep/wide tree on tmpfs

$ python3 walk.py 
os.walk: 201059 entries in 0.36 seconds
walk2  : 201059 entries in 0.299 seconds
$ python3 --version
Python 3.8.10
$ uname -a
Linux martha 5.11.0-34-generic #36~20.04.1-Ubuntu SMP Fri Aug 27 08:06:32 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux

@moreati
Copy link
Author

moreati commented Jul 29, 2022

10 - 20% faster than os.walk for a large directory tree. Depending on cross wind and other factors.

Raspberry Pi 4, tmpfs

pi@rpi1:~ $ python3 --version
Python 3.9.2
pi@rpi1:~ $ mount | grep /tmp
tmpfs on /tmp type tmpfs (rw,nosuid,nodev,relatime)
pi@rpi1:~ $ python3 walk.py 
os.walk: count=201059	time=1.47s	relative=1.0x
walk2  : count=201059	time=1.2s	relative=0.81x
pi@rpi1:~ $ python3 walk.py 
os.walk: count=201059	time=1.47s	relative=1.0x
walk2  : count=201059	time=1.18s	relative=0.8x

macOS 12, M1 Pro MBP

$ python walk.py   
os.walk: count=201059	time=0.363s	relative=1.0x
walk2  : count=201059	time=0.324s	relative=0.89x
$ python walk.py
os.walk: count=201059	time=0.359s	relative=1.0x
walk2  : count=201059	time=0.319s	relative=0.89x

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment