Skip to content

Instantly share code, notes, and snippets.

@ymmt2005
Last active June 30, 2021 05:04
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ymmt2005/4512427 to your computer and use it in GitHub Desktop.
Save ymmt2005/4512427 to your computer and use it in GitHub Desktop.
Fastest du for Linux. This is in fact faster than du(1) as long as all dentries are cached :-p
/**
* Compilation:
*
* 1. Without e2fslib:
* $ gcc -O2 -o due2fs due2fs.c
*
* 2. With e2fslib
* $ sudo apt-get install e2fslibs-dev
* $ gcc -DUSE_E2FSLIB -O2 -o due2fs due2fs.c -lext2fs
*
* Run:
* $ sudo due2fs DIRECTORY
*/
#define _GNU_SOURCE
#include <dirent.h> /* Defines DT_* constants */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/resource.h>
#ifdef USE_E2FSLIB
#include <ext2fs/ext2fs.h>
static ext2_filsys g_e2fs;
#endif
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
/**
* http://lxr.free-electrons.com/source/include/linux/dirent.h?v=3.2
*/
struct linux_dirent64 {
uint64_t d_ino;
int64_t d_off;
unsigned short d_reclen;
unsigned char d_type;
char d_name[0];
};
const size_t DIRENT_ONESIZE = sizeof(struct linux_dirent64) + 64;
const size_t EXPECTED_MAXFILES = 1200;
#ifdef USE_E2FSLIB
static inline uint64_t getfilesize(uint64_t ino) {
uint64_t size;
ext2_file_t f;
if( ext2fs_file_open(g_e2fs, ino, 0, &f) != 0 ) {
// skip errors
return 0;
}
size = (uint64_t)ext2fs_file_get_size(f);
ext2fs_file_close(f);
return size;
}
void open_ext2fs(const char* path) {
int p[2];
pid_t child;
char cmdline[4096];
int len = 0;
int r = snprintf(cmdline, sizeof(cmdline),
"/bin/df %s | /usr/bin/tail -1 | /usr/bin/awk '{printf \"%%s\", $1}'",
path);
if( r == sizeof(cmdline) ) {
fprintf(stderr, "path too long\n", path);
exit(EXIT_FAILURE);
}
if( pipe(p) != 0 )
handle_error("pipe");
child = fork();
if( child == -1 )
handle_error("fork");
if( child == 0 ) {
// child
close(p[0]);
if( dup2(p[1], 1) == -1 )
handle_error("dup2");
close(p[1]);
execl("/bin/sh", "/bin/sh", "-c", cmdline, (char*)NULL);
handle_error("execl");
}
close(p[1]);
while( 1 ) {
ssize_t nread = read(p[0], cmdline+len, sizeof(cmdline) - len);
if( nread == -1 )
handle_error("read");
if( nread == 0 ) {
cmdline[len] = '\0';
break;
}
len += nread;
if( len == sizeof(cmdline) ) {
fprintf(stderr, "too long device name?!\n");
exit(EXIT_FAILURE);
}
}
close(p[0]);
fprintf(stderr, "Using device \"%s\"\n", cmdline);
if( ext2fs_open(cmdline, 0, 0, 0, unix_io_manager, &g_e2fs) != 0 ) {
fprintf(stderr, "ext2fs_open failed.\n");
exit(EXIT_FAILURE);
}
}
#else // USE_E2FSLIB
static inline uint64_t getfilesize(int dir_fd, const char* path) {
struct stat st;
if( fstatat(dir_fd, path, &st, 0) != 0 ) {
// skip errors
return 0;
}
return (uint64_t)st.st_size;
}
#endif
static uint64_t diskusage(int dir_fd, char* buf, size_t bufsize) {
uint64_t total = 0;
ssize_t nread;
int bpos;
int dir_fds[EXPECTED_MAXFILES];
int n_dir_fds = 0;
int fd;
int i;
struct linux_dirent64 *d;
while( 1 ) {
nread = syscall(SYS_getdents64, dir_fd, buf, bufsize);
if( nread == -1 )
handle_error("getdents");
if( nread == 0 )
break;
for( bpos = 0; bpos < nread; ) {
d = (struct linux_dirent64 *) (buf + bpos);
if( strcmp(d->d_name, ".") == 0 ||
strcmp(d->d_name, "..") == 0 ) {
bpos += d->d_reclen;
continue;
}
if( d->d_type == DT_DIR ) {
if( n_dir_fds == EXPECTED_MAXFILES ) {
fprintf(stderr, "info: cache is full. flushing...\n");
char* new_buf = (char*)malloc(bufsize);
if( ! new_buf ) {
free(buf);
fprintf(stderr, "malloc failed.\n");
exit(EXIT_FAILURE);
}
for( i = 0; i < n_dir_fds; ++i ) {
total += diskusage(dir_fds[i], new_buf, bufsize);
close(dir_fds[i]);
}
free(new_buf);
n_dir_fds = 0;
}
fd = openat(dir_fd, d->d_name, O_RDONLY|O_DIRECTORY);
if( fd == -1 )
handle_error("openat");
dir_fds[n_dir_fds++] = fd;
}
else if( d->d_type == DT_REG ) {
#ifdef USE_E2FSLIB
total += getfilesize( d->d_ino );
#else
total += getfilesize( dir_fd, d->d_name );
#endif
}
// ignore other types
bpos += d->d_reclen;
}
}
for( i = 0; i < n_dir_fds; ++i ) {
total += diskusage(dir_fds[i], buf, bufsize);
close(dir_fds[i]);
}
return total;
}
int main(int argc, char *argv[]) {
int fd;
struct rlimit rlim;
const size_t bufsize = DIRENT_ONESIZE * EXPECTED_MAXFILES;
char* buf = (char*)malloc(bufsize);
if( ! buf ) {
fprintf(stderr, "malloc failed.\n");
exit(EXIT_FAILURE);
}
if( argc == 1 ) {
printf("Usage: due2fs DIRECTORY\n");
return EXIT_SUCCESS;
}
rlim.rlim_cur = EXPECTED_MAXFILES * 2;
rlim.rlim_max = EXPECTED_MAXFILES * 2;
if( setrlimit(RLIMIT_NOFILE, &rlim) != 0 )
handle_error("setrlimit(RLIMIT_NOFILE)");
fd = open(argv[1], O_RDONLY|O_DIRECTORY);
if( fd == -1 )
handle_error("open");
#ifdef USE_E2FSLIB
open_ext2fs(argv[1]);
#endif
printf("%llu\n", (unsigned long long)diskusage(fd, buf, bufsize));
close(fd);
free(buf);
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment