Create a gist now

Instantly share code, notes, and snippets.

Using FALLOC_FL_PUNCH_HOLE from Python to punch holes in files
import ctypes
import ctypes.util
c_off_t = ctypes.c_int64
def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name)
_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t]
del libc
del libc_name
def fallocate(fd, mode, offset, len_):
res = _fallocate(fd.fileno(), mode, offset, len_)
if res != 0:
raise IOError(res, 'fallocate')
return fallocate
fallocate = make_fallocate()
del make_fallocate
def punch(filename, verbose):
blocksize = 4096
if verbose:
print "processing", filename
with open(filename, 'r+') as f:
offset = 0
length = 0
while True:
buf =
if not buf:
for c in buf:
if c != '\x00':
if verbose:
print "punching hole at offset", offset, "length", len(buf)
offset, len(buf))
offset = offset + blocksize
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description = "Punch out the empty areas in a file, making it sparse")
parser.add_argument('file', metavar='FILE',
help='file(s) to modify in-place', nargs='+')
parser.add_argument('-v', '--verbose', action="store_true", default=False,
help='be verbose')
args = parser.parse_args()
for filename in args.file:
punch(filename, args.verbose)

Using a regex /^\s*$/ instead of looping through each character produced a 10 fold speed increase on my box. (see my fork)


version with
buff == "\x00" * blocksize
might be even faster and does not require re module

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment