Skip to content

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Using FALLOC_FL_PUNCH_HOLE from Python to punch holes in files
#!/usr/bin/python
import ctypes
import ctypes.util
c_off_t = ctypes.c_int64
def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name)
_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t]
del libc
del libc_name
def fallocate(fd, mode, offset, len_):
res = _fallocate(fd.fileno(), mode, offset, len_)
if res != 0:
raise IOError(res, 'fallocate')
return fallocate
fallocate = make_fallocate()
del make_fallocate
FALLOC_FL_KEEP_SIZE = 0x01
FALLOC_FL_PUNCH_HOLE = 0x02
def punch(filename, verbose):
blocksize = 4096
if verbose:
print "processing", filename
with open(filename, 'r+') as f:
offset = 0
length = 0
while True:
buf = f.read(blocksize)
if not buf:
break
for c in buf:
if c != '\x00':
break
else:
if verbose:
print "punching hole at offset", offset, "length", len(buf)
fallocate(f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
offset, len(buf))
offset = offset + blocksize
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description = "Punch out the empty areas in a file, making it sparse")
parser.add_argument('file', metavar='FILE',
help='file(s) to modify in-place', nargs='+')
parser.add_argument('-v', '--verbose', action="store_true", default=False,
help='be verbose')
args = parser.parse_args()
for filename in args.file:
punch(filename, args.verbose)
@vontrapp

Using a regex /^\s*$/ instead of looping through each character produced a 10 fold speed increase on my box. (see my fork)

@jkortus

version with
buff == "\x00" * blocksize
might be even faster and does not require re module

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.