Skip to content

Instantly share code, notes, and snippets.

@xkikeg
Last active July 13, 2022 08:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xkikeg/4645373 to your computer and use it in GitHub Desktop.
Save xkikeg/4645373 to your computer and use it in GitHub Desktop.
Find data positions of sparse file with SEEK_DATA & SEEK_HOLE, this does not work in Ubuntu 12.04.
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#ifndef SEEK_DATA
#warning "SEEK_DATA is undeclared and manually defined."
#define SEEK_DATA 3 /* seek to the next data */
#endif
#ifndef SEEK_HOLE
#warning "SEEK_HOLE is undeclared and manually defined."
#define SEEK_HOLE 4 /* seek to the next hole */
#endif
int main(int argc, char ** argv)
{
if (argc != 2) {
fprintf(stderr, "Usage: %s file\n", argv[0]);
}
int fd = open(argv[1], O_RDONLY);
if(fd == -1) {
perror("failed to open the file.");
exit(EXIT_FAILURE);
}
off_t offset=0;
struct stat status;
fstat(fd, &status);
const off_t size=status.st_size;
do {
off_t beg=lseek(fd, offset, SEEK_DATA);
if(beg == -1) {
perror("SEEK_DATA failed\n");
exit(EXIT_FAILURE);
}
off_t end=lseek(fd, beg, SEEK_HOLE);
if(end == -1) {
perror("SEEK_HOLE failed\n");
exit(EXIT_FAILURE);
}
lseek(fd, beg, SEEK_SET);
fprintf(stderr, "0x%llx 0x%llx\n",
(unsigned long long)beg,
(unsigned long long)end);
offset = end;
} while (offset < size);
return 0;
}
@SteveLauC
Copy link

Hi, would you like to try my code snippet? It would print data and hole byte index line by line:

#define _GNU_SOURCE

#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

enum Type {
	HOLE,
	DATA,
};

void find_all_holes(int fd);

int main(int ac, char *av[])
{
	int fd = open(av[1], O_RDONLY);
	if (fd == -1) {
		perror("open");
		exit(EXIT_FAILURE);
	}

	find_all_holes(fd);
	return 0;
}

void find_all_holes(int fd)
{
	off_t cur_offset = 0; // current offset
	enum Type cur_type; // current byte type

	off_t file_size = lseek(fd, 0, SEEK_END);
	off_t index_of_last_byte = file_size - 1;

	printf("This file has %ld bytes\n", file_size);

	// where are we? HOLE or DATA
	off_t res = lseek(fd, 0, SEEK_HOLE);
	if (res == 0) {
		cur_type = HOLE;
	} else if (res == file_size) {
		printf("[0, %ld]: data(then exit)\n", index_of_last_byte);
		exit(0);
	} else {
		cur_type = DATA;
		cur_offset = res;
	}

	while (cur_offset <= index_of_last_byte) {
		off_t new_offset =
			lseek(fd, cur_offset,
			      ((cur_type == DATA) ? SEEK_HOLE : SEEK_DATA));
		if ((cur_type == HOLE && new_offset == -1 && errno == ENXIO) ||
		    (cur_type == DATA && new_offset == file_size)) {
			// from current position to the end of this file: `cur_type`
			printf("[%ld, %ld]: %s(end)\n", cur_offset,
			       index_of_last_byte,
			       ((cur_type == DATA) ? "data" : "hole"));
			break; // exit of while loop
		} else {
			// from current offset to the new offset: `cur_type`
			printf("[%ld, %ld]: %s\n", cur_offset, new_offset - 1,
			       ((cur_type == DATA) ? "data" : "hole"));

			cur_offset = new_offset;
			cur_type = (cur_type == DATA) ? HOLE : DATA;
		}
	}
}

@xkikeg
Copy link
Author

xkikeg commented Jul 13, 2022

Thanks for the comment with the code! I honestly got lost all the context around my snippet and even not sure how to test it though. At least I can say yours looks cleaner and better :-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment