Skip to content

Instantly share code, notes, and snippets.

@cbsmith
Last active March 26, 2017 22:31
Show Gist options
  • Save cbsmith/d8ba58a8c1f8d79d3681ae611d464fa7 to your computer and use it in GitHub Desktop.
Save cbsmith/d8ba58a8c1f8d79d3681ae611d464fa7 to your computer and use it in GitHub Desktop.
Live tail of a file
'''
Live tail of a file. Uses mmap to try to be efficient with memory.
Requires fsmonitor: https://github.com/shaurz/fsmonitor
...which unfortunately lacks a decent monitor for non-Windows/Linux
systems, so burns up CPU unnecessarily on those platforms. Despite its
reputation, python makes it surprisingly difficult to write clean and
correct code.
'''
# all the python things we need
from contextlib import closing
from mmap import mmap, ACCESS_READ, ALLOCATIONGRANULARITY
from os import fdopen, stat, fstat
from sys import argv, exit, stdout
# the magic
from fsmonitor import FSMonitor
def dump_contents(fd, offset, output):
'''
Dump contents attached to a filedescriptor.
fd - a file descriptor, needs to support mmap
offset - offset into the file object to start at
'''
filesize = fstat(fd).st_size
gap = filesize - offset
if gap < 0:
exit(-1) # file shrank, can't do much useful
if gap > 0:
# some logic here to paper over mmap API's inherent ugliness
pageoffset = offset % ALLOCATIONGRANULARITY
mmapoffset = offset - pageoffset
with closing(mmap(fd, filesize - mmapoffset, access=ACCESS_READ, offset=mmapoffset)) as mapped:
# handle the mislaigned bits
if (pageoffset > 0):
mapped.seek(pageoffset)
output.write(mapped.read(min(ALLOCATIONGRANULARITY, gap) - pageoffset))
# iterate through a chunk at a time
for _ in xrange(pageoffset, filesize, ALLOCATIONGRANULARITY):
output.write(mapped.read(ALLOCATIONGRANULARITY))
return filesize
def tail(infile=__file__, outfile=None):
# disable buffering
with open(outfile, 'wb', 0) if outfile else fdopen(stdout.fileno(), 'wb', 0) as output:
with open(infile, 'rb') as f:
m = FSMonitor()
m.add_file_watch(infile)
offset = stat(infile).st_size
while True:
for _ in m.read_events():
offset = dump_contents(f.fileno(), offset, output)
if __name__ == '__main__':
tail(*argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment