Skip to content

Instantly share code, notes, and snippets.

@dfunckt
Last active November 14, 2015 21:31
Show Gist options
  • Save dfunckt/055d9a275039d4134558 to your computer and use it in GitHub Desktop.
Save dfunckt/055d9a275039d4134558 to your computer and use it in GitHub Desktop.
Source code for two programs that work together to split directories with huge number of files by type. Compile main.c as listdir and run $ listdir <dirname> | splitfolder.py
//
// main.c
// llistdir
//
#define _GNU_SOURCE
#ifdef __APPLE__
#define _DARWIN_NO_64_BIT_INODE
#endif
#include <dirent.h> /* Defines DT_* constants */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#define handle_error(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
#define BUF_SIZE 1024 * 64
#ifdef __APPLE__
struct _dirent {
ino_t d_ino;
__uint16_t d_reclen;
__uint8_t d_type;
__uint8_t d_namlen;
char d_name[255 + 1];
};
#else
struct _dirent {
ino_t d_ino;
off_t d_off;
unsigned short d_reclen;
unsigned char d_type;
char d_name[256];
};
#endif
int main(int argc, const char * argv[])
{
int fd, nread, bpos;
char buf[BUF_SIZE];
struct _dirent *d;
long basep;
fd = open(argc > 1 ? argv[1] : ".", O_RDONLY | O_DIRECTORY);
if (fd == -1)
handle_error("open");
for ( ; ; ) {
nread = getdirentries(fd, buf, BUF_SIZE, &basep);
if (nread == -1)
handle_error("getdirentries");
if (nread == 0)
break;
for (bpos = 0; bpos < nread;) {
d = (struct _dirent *)(buf + bpos);
if(d->d_ino != 0 && d->d_type == DT_REG) {
fprintf(stdout, "%s\n", (char *)d->d_name);
}
bpos += d->d_reclen;
}
fflush(stdout);
}
exit(EXIT_SUCCESS);
}
#!/usr/bin/env python
from os import renames
from os.path import basename, join
def movefiles(input_stream):
lvl1 = 1
lvl2 = 1
counter = 1
base = '.'
perdir = 100
for line in input_stream:
filepath = line.strip()
l1 = '%02d' % (lvl1 - 1)
l2 = '%02d' % (lvl2 - 1)
src = filepath
dst = join(base, l1, l2, basename(filepath))
print '%08d: moving %s to %s' % (counter, src, dst)
renames(src, dst)
counter += 1
if counter % (perdir * perdir) == 0:
lvl1 += 1
lvl2 = 1
elif counter % perdir == 0:
lvl2 += 1
return 0
if __name__ == '__main__':
import sys
sys.exit(movefiles(sys.stdin))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment