Skip to content

Instantly share code, notes, and snippets.

@jimparis
Forked from NicolasT/punch.py
Created October 16, 2012 20:53
Show Gist options
  • Save jimparis/3901942 to your computer and use it in GitHub Desktop.
Save jimparis/3901942 to your computer and use it in GitHub Desktop.
Using FALLOC_FL_PUNCH_HOLE from Python to punch holes in files
#!/usr/bin/python
import ctypes
import ctypes.util
c_off_t = ctypes.c_int64
def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name)
_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t]
del libc
del libc_name
def fallocate(fd, mode, offset, len_):
res = _fallocate(fd.fileno(), mode, offset, len_)
if res != 0:
raise IOError(res, 'fallocate')
return fallocate
fallocate = make_fallocate()
del make_fallocate
FALLOC_FL_KEEP_SIZE = 0x01
FALLOC_FL_PUNCH_HOLE = 0x02
def punch(filename, verbose):
blocksize = 4096
if verbose:
print "processing", filename
with open(filename, 'r+') as f:
offset = 0
length = 0
while True:
buf = f.read(blocksize)
if not buf:
break
for c in buf:
if c != '\x00':
break
else:
if verbose:
print "punching hole at offset", offset, "length", len(buf)
fallocate(f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
offset, len(buf))
offset = offset + blocksize
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description = "Punch out the empty areas in a file, making it sparse")
parser.add_argument('file', metavar='FILE',
help='file(s) to modify in-place', nargs='+')
parser.add_argument('-v', '--verbose', action="store_true", default=False,
help='be verbose')
args = parser.parse_args()
for filename in args.file:
punch(filename, args.verbose)
@vontrapp
Copy link

Using a regex /^\s*$/ instead of looping through each character produced a 10 fold speed increase on my box. (see my fork)

@jkortus
Copy link

jkortus commented Feb 15, 2015

version with
buff == "\x00" * blocksize
might be even faster and does not require re module

@cybervegan
Copy link

@jkortus is mostly right - comparing against a null block does indeed seem to be the fastest method in my own tests. But what jk wrote allocates a null block every time it compares (not efficient). You need to create the null block once and keep comparing against it:

null_block = "\x00" * blocksize
...
...
if buff == null_block:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment