Created — forked from NicolasT/punch.py

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Using FALLOC_FL_PUNCH_HOLE from Python to punch holes in files

View punch.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
#!/usr/bin/python
 
import ctypes
import ctypes.util
 
c_off_t = ctypes.c_int64
 
def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name)
 
_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t]
 
del libc
del libc_name
 
def fallocate(fd, mode, offset, len_):
res = _fallocate(fd.fileno(), mode, offset, len_)
if res != 0:
raise IOError(res, 'fallocate')
 
return fallocate
 
fallocate = make_fallocate()
del make_fallocate
 
FALLOC_FL_KEEP_SIZE = 0x01
FALLOC_FL_PUNCH_HOLE = 0x02
 
 
def punch(filename, verbose):
blocksize = 4096
if verbose:
print "processing", filename
with open(filename, 'r+') as f:
offset = 0
length = 0
while True:
buf = f.read(blocksize)
if not buf:
break
for c in buf:
if c != '\x00':
break
else:
if verbose:
print "punching hole at offset", offset, "length", len(buf)
fallocate(f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
offset, len(buf))
offset = offset + blocksize
 
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description = "Punch out the empty areas in a file, making it sparse")
parser.add_argument('file', metavar='FILE',
help='file(s) to modify in-place', nargs='+')
parser.add_argument('-v', '--verbose', action="store_true", default=False,
help='be verbose')
args = parser.parse_args()
for filename in args.file:
punch(filename, args.verbose)

Using a regex /^\s*$/ instead of looping through each character produced a 10 fold speed increase on my box. (see my fork)

version with
buff == "\x00" * blocksize
might be even faster and does not require re module

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.