Skip to content

Instantly share code, notes, and snippets.

@nlw0
Last active October 10, 2019 21:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nlw0/0549c9fafa5e6c259e47a73e913c1185 to your computer and use it in GitHub Desktop.
Save nlw0/0549c9fafa5e6c259e47a73e913c1185 to your computer and use it in GitHub Desktop.
Counting files based on libuv's readdir and scandir via C and Julia
#include "uv.h"
#include "stdio.h"
#include "string.h"
#include <sys/time.h>
int min(int a, int b) { return (a < b)? a : b; }
int get_file_count_read(const char path[], int chunklen) {
uv_fs_t readdir_req;
uv_fs_opendir(NULL, &readdir_req, path, NULL);
uv_dirent_t dirents[1024];
uv_dir_t* rdir = readdir_req.ptr;
rdir->dirents = dirents;
rdir->nentries = min(1024,chunklen);
int acc = 0;
char fullname[1024];
strcpy(fullname, path);
char * filebase = fullname+strlen(path)+1;
*(filebase-1)='/';
for(;;) {
int r = uv_fs_readdir(NULL, &readdir_req, readdir_req.ptr, NULL);
if (r <= 0)
break;
for (int i=0; i<min(chunklen, r); i++) {
if (dirents[i].type == UV_DIRENT_DIR) {
strcpy(filebase, dirents[i].name);
acc += get_file_count_read(fullname, chunklen);
} else if (dirents[i].type == UV_DIRENT_FILE) {
acc += 1;
}
}
}
uv_fs_closedir(NULL, &readdir_req, readdir_req.ptr, NULL);
return acc;
}
int get_file_count_scan(const char path[], int chunklen) {
uv_fs_t readdir_req;
uv_fs_scandir(NULL, &readdir_req, path, 0, NULL);
uv_dirent_t dirent;
int acc = 0;
char fullname[1024];
strcpy(fullname, path);
char * filebase = fullname+strlen(path)+1;
*(filebase-1)='/';
for(;;) {
int r = uv_fs_scandir_next(&readdir_req, &dirent);
if (r == UV_EOF)
break;
if (dirent.type == UV_DIRENT_DIR) {
strcpy(filebase, dirent.name);
acc += get_file_count_scan(fullname, chunklen);
} else if (dirent.type == UV_DIRENT_FILE) {
acc += 1;
}
}
uv_fs_req_cleanup(&readdir_req);
return acc;
}
int main(int argc, char** argv) {
int niter=3;
double t1 = (float)clock()/CLOCKS_PER_SEC;
for (int i=0; i<niter; i++) {
get_file_count_read(argv[1], 11111);
}
t1 = (float)clock()/CLOCKS_PER_SEC - t1;
printf("readdir file count %f\n", t1/niter);
double t2 = (float)clock()/CLOCKS_PER_SEC;
for (int i=0; i<niter; i++) {
get_file_count_scan(argv[1], 22);
}
t2 = (float)clock()/CLOCKS_PER_SEC - t1;
printf("scandir file count %f\n", t2/niter);
return 0;
}
// 2560000 files - 36%
// readdir file count 1.249874
// scandir file count 3.428333
// 640000 files - 43%
// readdir file count 0.321972
// scandir file count 0.748858
// 800*800 files - 86%
// readdir file count 0.330214
// scandir file count 0.383865
using MD5
using BenchmarkTools
function generate_tree(treeroot, N, silly=false)
mkdir(treeroot)
J = if silly 1 else N end
K = if silly N*N else N end
for j in 1:J
dirname = treeroot * "/" * bytes2hex(md5("$j"))
mkdir(dirname)
for k in 1:K
filename = dirname * "/" * bytes2hex(md5("$j$k")) * ".png"
open(filename, "w") do io
write(io, join(rand('a':'z', rand(8:16))))
end
end
end
end
function get_count_readdir(dir)
acc = 0
for entry in readdir(dir)
# fullname = dir*"/"*entry
# if isdir(fullname)
# acc += get_count_readdir(fullname)
# else
acc += 1
# end
end
acc
end
function get_count_lazyreaddir(dir)
acc = 0
for entry in lazyreaddir(dir)
# fullname = dir*"/"*entry
# if isdir(fullname)
# acc += get_count_lazyreaddir(fullname)
# else
acc += 1
# end
end
acc
end
# treeroot = "/tmp/testdir"
treeroot = "/tmp/testdir/c4ca4238a0b923820dcc509a6f75849b"
generate_tree(treeroot, 1600, true)
total = @btime get_count_lazyreaddir(treeroot)
total = @btime get_count_readdir(treeroot)
#6.215 s (10240035 allocations: 273.44 MiB)
#3.873 s (2560024 allocations: 189.25 MiB)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment