Skip to content
Create a gist now

Instantly share code, notes, and snippets.

Using FALLOC_FL_PUNCH_HOLE from Python to punch holes in files
import ctypes
import ctypes.util
c_off_t = ctypes.c_int64
def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name)
_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t]
del libc
del libc_name
def fallocate(fd, mode, offset, len_):
res = _fallocate(fd.fileno(), mode, offset, len_)
if res != 0:
raise IOError(res, 'fallocate')
return fallocate
fallocate = make_fallocate()
del make_fallocate
def punch(filename, verbose):
blocksize = 4096
if verbose:
print "processing", filename
with open(filename, 'r+') as f:
offset = 0
length = 0
while True:
buf =
if not buf:
for c in buf:
if c != '\x00':
if verbose:
print "punching hole at offset", offset, "length", len(buf)
offset, len(buf))
offset = offset + blocksize
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description = "Punch out the empty areas in a file, making it sparse")
parser.add_argument('file', metavar='FILE',
help='file(s) to modify in-place', nargs='+')
parser.add_argument('-v', '--verbose', action="store_true", default=False,
help='be verbose')
args = parser.parse_args()
for filename in args.file:
punch(filename, args.verbose)

Using a regex /^\s*$/ instead of looping through each character produced a 10 fold speed increase on my box. (see my fork)

jkortus commented Feb 15, 2015

version with
buff == "\x00" * blocksize
might be even faster and does not require re module

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.