Skip to content

Instantly share code, notes, and snippets.

@relipse
Last active November 15, 2023 15:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save relipse/0fec8412b61dbe2aa40d7639c3432136 to your computer and use it in GitHub Desktop.
Save relipse/0fec8412b61dbe2aa40d7639c3432136 to your computer and use it in GitHub Desktop.
List files and some statistics in directory for huge directories.
/**
* dirlist
* A program to list files in a directory with some information as fast as possible.
* Useful for directories with 500,000 files.
*
* @date 2023-10-23
* @author relipse
*/
#include <sys/stat.h>
#include <time.h>
#include <stdio.h>
#include <cstring>
#include <dirent.h>
#include <locale.h>
#include <string>
#include <fstream>
#include <iostream>
#include <map>
#include <vector>
using namespace std;
void printHelp(int argc, char *argv[]){
printf("%s [Options] [directory]\n", argv[0]);
printf("List number of files and each file (if -f,--hide-files is not set)\n"
"in a directory as fast as possible,\n"
"also list alphabetical min and max (by filename)\n");
printf("\nOptions:\n");
printf("-f,--hide-files Do not show each file\n");
printf("-d,--dots Use 1 period (dot) to represent each file\n");
printf("-s,--file-size Show file sizes and min/max (much slower)\n");
printf("-c,--created Show created/modified dates and min/max\n");
printf("-g,--grep <match> Only show file names matching <match>\n");
printf("-h,--help Show this page\n");
printf("\nExamples:\n");
printf("# ./dirlist\n"
"1. dirlist\n"
"2. dirlist.cpp\n"
"3. Makefile"
"\n"
"Directory: . contains 3 files\n"
"Alphabetical Max: dirlist.cpp\n"
"Alphabetical Min: Makefile\n");
printf("\n# ./dirlist -d\n"
"...\n"
"Directory: . contains 3 files\n"
"Alphabetical Max: dirlist.cpp\n"
"Alphabetical Min: Makefile\n"
);
}
void printExtensionMap(std::map<string,unsigned long>& m){
cout << "Extensions List (extension: count):" << endl;
for(map<string,unsigned long>::const_iterator it = m.begin();
it != m.end(); ++it)
{
std::cout << "\"" << it->first << "\": " << it->second << endl;
}
}
int main(int argc, char *argv[]) {
DIR *dir;
struct dirent *ent;
unsigned long count = 0;
string theDir = ".";
string max = "";
string min = "";
string file;
string fileTypes = "";
bool printAllFiles = true;
bool printDots = false;
bool printSize = false;
unsigned long fileSizeSum = 0;
unsigned long maxSize = 0;
unsigned long minSize = 0;
unsigned long fileSize = 0;
string maxSizeFileName = "";
string minSizeFileName = "";
string grep = "";
std::vector<string> aryGrep;
std::map<string,unsigned long> foundExts;
setlocale(LC_NUMERIC, "");
bool printMoreFileStats = false;
string earliestCreatedDate = "";
string latestCreatedDate = "";
string earliestCreatedFile = "";
string latestCreatedFile = "";
if (argc > 1){
string arg;
for (int i = 1; i < argc; ++i){
arg = argv[i];
if (arg == "--hide-files" || arg == "-f"){
printAllFiles = false;
}else if (arg == "-c" || arg == "--created"){
printMoreFileStats = true;
}else if (arg == "-s" || arg == "--file-size"){
printSize = true;
}else if (arg == "-h" || arg == "--help"){
printHelp(argc, argv);
return 0;
}else if (arg == "--dots" || arg == "-d"){
printDots = true;
printAllFiles = false;
}else if (arg == "--grep" || arg == "-g"){
if (i+1 >= argc){
cout << "--grep|-g needs next argument (missing)." << endl;
printHelp(argc, argv);
return 0;
}else{
grep = argv[i+1];
aryGrep.push_back(grep);
i++;
}
}else{
theDir = arg;
if (theDir == ""){
theDir = ".";
}
}
}
}
if (aryGrep.size() > 0){
cout << "Grep on, -g,--grep matching all terms: ";
for(unsigned int i = 0; i < aryGrep.size(); ++i){
if (i > 0){ cout << ", "; }
cout << aryGrep[i];
}
cout << endl;
}
dir = opendir(theDir.c_str());
while((ent = readdir(dir))){
file = ent->d_name;
std::string::size_type idx;
idx = file.rfind('.');
if (file == ".." || file == "."){
continue;
}
if (aryGrep.size() > 0){
unsigned int matches = 0;
for (unsigned int i = 0; i < aryGrep.size(); ++i){
grep = aryGrep[i];
if (file.find(grep) != string::npos){
matches++;
}
}
//all greps must match or we go to next file
if (matches != aryGrep.size()){
//no grep matches found, go to next file
continue;
}
}
string extension = "";
if(idx != std::string::npos)
{
extension = file.substr(idx+1);
}
if (!foundExts.count(extension)){
foundExts[extension] = 0;
}
foundExts[extension]++;
++count;
if (max == "" || ent->d_name > max){
max = ent->d_name;
}
if (min == "" || ent->d_name < min){
min = ent->d_name;
}
if (printAllFiles){
printf("%'lu. %s", count, ent->d_name);
if (printMoreFileStats){
string modtime = "";
string createdtime = "";
struct stat t_stat;
stat(ent->d_name, &t_stat);
struct tm * timeinfo = localtime(&t_stat.st_ctime);
struct tm * timemod = localtime(&(t_stat.st_mtime));
char buffer [80];
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S",timeinfo);
string ct = buffer;
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S",timemod);
string mt = buffer;
if (earliestCreatedDate == "" || ct < earliestCreatedDate){
earliestCreatedDate = ct;
earliestCreatedFile = ent->d_name;
}
if (latestCreatedDate == "" || ct > latestCreatedDate){
latestCreatedDate = ct;
latestCreatedFile = ent->d_name;
}
createdtime = asctime(timeinfo);
createdtime.erase (strcspn (createdtime.c_str(), "\r\n"));
modtime = asctime(timemod);
modtime.erase(strcspn(modtime.c_str(), "\r\n"));
if (createdtime == modtime){
cout << " (created/modified: " << createdtime;
}else{
cout << " (created: " << createdtime << ", modified: " << modtime;
}
cout << ")";
}
if (printSize){ // -s, --file-size
ifstream in_file(ent->d_name, ios::binary);
in_file.seekg(0, ios::end);
fileSize = (unsigned long) in_file.tellg();
fileSizeSum += fileSize;
if (fileSize > maxSize){
maxSize = fileSize;
maxSizeFileName = ent->d_name;
}
if (minSize == 0 || fileSize < minSize){
minSize = fileSize;
minSizeFileName = ent->d_name;
}
printf(" %'lu bytes", fileSize);
}
printf("\n");
}
if (printDots){
printf(".");
}
}
closedir(dir);
if (printDots){
printf("\n");
}
if (aryGrep.size() > 0){
cout << "Directory " << theDir << " contains " << count << " matching files." << endl;
}else{
printf("Directory: %s contains %'lu files\n", theDir.c_str(), count);
}
printf("Alphabetical Max: %s\n", max.c_str());
printf("Alphabetical Min: %s\n", min.c_str());
if (printSize){
printf("Largest File: %s %'lu bytes\n", maxSizeFileName.c_str(), maxSize);
printf("Smallest File: %s %'lu bytes\n", minSizeFileName.c_str(), minSize);
printf("Total File Size Summation: %'lu bytes\n", fileSizeSum);
}
if (printMoreFileStats){
cout << "Earliest File: " << earliestCreatedFile << " (created: " << earliestCreatedDate << ")" << endl;
cout << "Latest File: " << latestCreatedFile << " (created: " << latestCreatedDate << ")" << endl;
}
printExtensionMap(foundExts);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment