public
Last active — forked from NicolasT/punch.py

Using FALLOC_FL_PUNCH_HOLE from Python to punch holes in files

  • Download Gist
punch.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
#!/usr/bin/python
 
import ctypes
import ctypes.util
 
c_off_t = ctypes.c_int64
 
def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name)
 
_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ctypes.c_int, ctypes.c_int, c_off_t, c_off_t]
 
del libc
del libc_name
 
def fallocate(fd, mode, offset, len_):
res = _fallocate(fd.fileno(), mode, offset, len_)
if res != 0:
raise IOError(res, 'fallocate')
 
return fallocate
 
fallocate = make_fallocate()
del make_fallocate
 
FALLOC_FL_KEEP_SIZE = 0x01
FALLOC_FL_PUNCH_HOLE = 0x02
 
 
def punch(filename, verbose):
blocksize = 4096
if verbose:
print "processing", filename
with open(filename, 'r+') as f:
offset = 0
length = 0
while True:
buf = f.read(blocksize)
if not buf:
break
for c in buf:
if c != '\x00':
break
else:
if verbose:
print "punching hole at offset", offset, "length", len(buf)
fallocate(f, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
offset, len(buf))
offset = offset + blocksize
 
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description = "Punch out the empty areas in a file, making it sparse")
parser.add_argument('file', metavar='FILE',
help='file(s) to modify in-place', nargs='+')
parser.add_argument('-v', '--verbose', action="store_true", default=False,
help='be verbose')
args = parser.parse_args()
for filename in args.file:
punch(filename, args.verbose)

Using a regex /^\s*$/ instead of looping through each character produced a 10 fold speed increase on my box. (see my fork)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.