Skip to content

Instantly share code, notes, and snippets.

@apainintheneck
Created June 4, 2022 22:19
Show Gist options
  • Save apainintheneck/21c5a1eb2c1fb4f8b0afa7d67bfdf114 to your computer and use it in GitHub Desktop.
Save apainintheneck/21c5a1eb2c1fb4f8b0afa7d67bfdf114 to your computer and use it in GitHub Desktop.
A simple cli app that allows you to search for files older than x.
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <sys/stat.h>
#include <glob.h>
#include <ctime>
using opt_map = std::map<std::string, std::string>;
//
// HELP
//
auto help() {
return R"EOS(
[old]
Find files and dirs by their age.
[usage]
old [-dfv] [-o=oldest-elems] [-m=months-old] [-y=years-old] [-l=ACCESS|MODIFY|CHANGE] [file ...]
[options]
-d / --dirs :
Only include dirs in search.
-f / --files :
Only include files in search.
-l= / --last= :
Time mode used for age comparison.
ACCESS is the last access time(default).
MODIFY is the last modification time.
CHANGE is the last status change time.
-m= / --months-old= :
Show files or dirs older than n months.
-o= / --oldest= :
Show the oldest n files or dirs.
Has priority over -m and -y.
-v / --verbose :
Print timestamp before each path.
-y= / --years-old= :
Show files or dirs older than n years.
)EOS";
}
//
// UTILS
//
template <typename T1, typename T2>
class top_n_list {
public:
using pair = std::pair<T1, T2>;
// Use a less than function to get n smallest.
// Use a greater than function to get n largest.
using cmp_func = std::function<bool(pair, pair)>;
top_n_list(size_t capacity, cmp_func cmp) : capacity_(capacity), cmp_(cmp) {
list.reserve(capacity_);
}
bool insert(pair elem);
const pair& top();
void pop();
bool empty();
private:
const size_t capacity_;
std::vector<pair> list;
cmp_func cmp_;
};
template <typename T1, typename T2>
bool top_n_list<T1, T2>::insert(top_n_list<T1, T2>::pair value) {
if(list.size() < capacity_) {
list.push_back(std::move(value));
std::push_heap(list.begin(), list.end(), cmp_);
} else {
if(cmp_(list.front(), value)) return false;
std::pop_heap(list.begin(), list.end(), cmp_);
list.back() = std::move(value);
std::push_heap(list.begin(), list.end(), cmp_);
}
return true;
}
template <typename T1, typename T2>
const typename top_n_list<T1, T2>::pair& top_n_list<T1, T2>::top() {
if(empty()) throw std::out_of_range("Cannot look at top of empty container");
const auto& top_elem = list.front();
return top_elem;
}
template <typename T1, typename T2>
void top_n_list<T1, T2>::pop() {
if(empty()) throw std::out_of_range("Cannot pop off of empty container");
std::pop_heap(list.begin(), list.end(), cmp_);
list.pop_back();
}
template <typename T1, typename T2>
bool top_n_list<T1, T2>::empty() {
return list.empty();
}
//
// GLOB
//
std::vector<std::string> glob(const std::vector<std::string>& patterns) {
if(patterns.empty()) return {};
glob_t globbuf;
globbuf.gl_offs = 0;
// Set GLOB_MARK to easily be able to differentiate between directories and files.
auto flags = GLOB_DOOFFS | GLOB_TILDE | GLOB_MARK;
// First time without GLOB_APPEND flag.
glob(patterns.front().c_str(), flags, NULL, &globbuf);
// Every following call to glob() with GLOB_APPEND set.
flags |= GLOB_APPEND;
for(size_t i = 1; i < patterns.size(); ++i) {
glob(patterns[i].c_str(), flags, NULL, &globbuf);
}
std::vector<std::string> matches {globbuf.gl_pathv, globbuf.gl_pathv + globbuf.gl_pathc};
globfree(&globbuf);
return matches;
}
std::vector<std::string> glob_files(const std::vector<std::string>& patterns) {
auto matches = glob(patterns);
const auto is_dir = [](const std::string& str){
return str.back() == '/';
};
const auto last = std::remove_if(matches.begin(), matches.end(), is_dir);
matches.erase(last, matches.end());
return matches;
}
std::vector<std::string> glob_dirs(const std::vector<std::string>& patterns) {
auto matches = glob(patterns);
const auto not_dir = [](const std::string& str){
return str.back() != '/';
};
const auto last = std::remove_if(matches.begin(), matches.end(), not_dir);
matches.erase(last, matches.end());
return matches;
}
std::vector<std::string> glob_dispatch(const std::vector<std::string>& patterns, const opt_map& options) {
bool include_files = options.count("-f") or options.count("--files");
bool include_dirs = options.count("-d") or options.count("--dirs");
if(include_files and not include_dirs) {
return glob_files(patterns);
} else if(include_dirs and not include_files) {
return glob_dirs(patterns);
}
return glob(patterns);
}
//
// TIME
//
std::time_t time_in_the_past(int months, int years) {
const auto epoch_time = std::time(NULL);
auto calendar_time = *std::localtime(&epoch_time);
if(calendar_time.tm_mon < months) {
++years;
calendar_time.tm_mon -= months + 12;
} else {
calendar_time.tm_mon -= months;
}
calendar_time.tm_year -= years;
return std::mktime(&calendar_time);
}
bool older_than(const std::time_t& file_time, const std::time_t& cutoff_time) {
return std::difftime(cutoff_time, file_time) > 0;
}
namespace time_func {
std::time_t atime(const struct stat64& statbuf) {
return statbuf.st_atime;
}
std::time_t mtime(const struct stat64& statbuf) {
return statbuf.st_mtime;
}
std::time_t ctime(const struct stat64& statbuf) {
return statbuf.st_ctime;
}
} // namespace time_func
using time_from_stat64 = std::function<std::time_t(const struct stat64&)>;
time_from_stat64 time_func_dispatch(const opt_map& options) {
std::string mode;
if(options.count("--last")) mode = options.at("--last");
else if(options.count("-l")) mode = options.at("-l");
if(mode == "ACCESS") {
return time_func::atime;
} else if(mode == "MODIFY") {
return time_func::mtime;
} else if(mode == "CHANGE") {
return time_func::ctime;
} else if(mode.empty()) {
return time_func::atime;
} else {
std::cout << "Invalid --last: unknown parameter: " << mode << '\n';
exit(EXIT_FAILURE);
}
}
//
// PRINT
//
bool print_older_than_n(const std::vector<std::string>& paths, const std::time_t& cutoff_time, const time_from_stat64 get_time, const bool verbose) {
struct stat64 statbuf;
bool found_match = false;
for(auto& path : paths) {
if(stat64(path.c_str(), &statbuf) != 0) continue;
const auto file_time = get_time(statbuf);
if(not older_than(file_time, cutoff_time)) continue;
if(verbose) {
char* time_str = ctime(&file_time);
time_str[strcspn(time_str, "\n")] = '\0';
std::cout << time_str << " | ";
}
std::cout << path << '\n';
found_match = true;
}
return found_match;
}
bool print_oldest_n(const std::vector<std::string>& paths, const time_from_stat64 get_time, const bool verbose, const int oldest_n) {
const auto cmp = [](const auto& a, const auto& b){ return older_than(a.first, b.first); };
top_n_list<std::time_t, std::string> oldest_n_list(oldest_n, cmp);
struct stat64 statbuf;
for(auto& path : paths) {
if(stat64(path.c_str(), &statbuf) != 0) continue;
const auto file_time = get_time(statbuf);
oldest_n_list.insert({file_time, path});
}
if(oldest_n_list.empty()) return false;
while(not oldest_n_list.empty()) {
if(verbose) {
char* time_str = ctime(&oldest_n_list.top().first);
time_str[strcspn(time_str, "\n")] = '\0';
std::cout << time_str << " | ";
}
std::cout << oldest_n_list.top().second << '\n';
oldest_n_list.pop();
}
return true;
}
//
// PARSE
//
std::vector<std::string> parse_args(const int argc, const char* argv[]) {
std::vector<std::string> args;
for(int i = 1; i < argc; ++i) {
if(strlen(argv[i]) and argv[i][0] != '-') args.push_back(argv[i]);
}
return args;
}
opt_map parse_options(const int argc, const char* argv[]) {
std::map<std::string,std::string> options;
for(int i = 1; i < argc; ++i) {
if(strlen(argv[i]) and argv[i][0] == '-') {
const std::string str(argv[i]);
const auto pos = str.find('=');
if(pos == std::string::npos) {
options[str] = "";
} else if(pos + 1 == str.size()) {
// Skip options with trailing equals signs.
} else {
options[str.substr(0, pos)] = str.substr(pos + 1);
}
}
}
return options;
}
std::string parse_option(const std::string short_opt, const std::string long_opt, const opt_map& options) {
std::string value;
if(options.count(long_opt)) value = options.at(long_opt);
else if(options.count(short_opt)) value = options.at(short_opt);
return value;
}
int parse_positive_int(const std::string& str, const char* err_msg) {
size_t next_char;
try {
auto value = stoi(str, &next_char);
if(next_char != str.size() or value < 0)
throw std::invalid_argument("Invalid integer");
return value;
} catch(...) {
std::cout << err_msg << '\n';
exit(EXIT_FAILURE);
}
}
time_t parse_time(const opt_map& options) {
int years = 0;
{
const std::string years_str = parse_option("-y", "--years-old", options);
if(not years_str.empty()) {
years = parse_positive_int(years_str, "Invalid --years-old: requires a positive integer");
}
}
int months = 0;
{
const std::string months_str = parse_option("-m", "--months-old", options);
if(not months_str.empty()) {
months = parse_positive_int(months_str, "Invalid --months-old: requires a positive integer");
}
years += months / 12;
months %= 12;
}
return time_in_the_past(months, years);
}
int parse_oldest(const opt_map& options) {
const std::string oldest_str = parse_option("-o", "--oldest", options);
int oldest = 0;
if(not oldest_str.empty()) {
oldest = parse_positive_int(oldest_str, "Invalid --oldest: requires a positive integer");
}
return oldest;
}
int main(const int argc, const char * argv[]) {
if(argc < 2 or strcmp(argv[1], "-h") == 0 or strcmp(argv[1], "--help") == 0) {
std::cout << help();
return EXIT_SUCCESS;
}
const auto options = parse_options(argc, argv);
const auto paths = glob_dispatch(parse_args(argc, argv), options);
if(paths.empty()) exit(EXIT_FAILURE);
const auto time_func = time_func_dispatch(options);
const bool verbose = options.count("-v") or options.count("--verbose");
const auto oldest = parse_oldest(options);
bool success = false;
if(oldest) {
success = print_oldest_n(paths, time_func, verbose, oldest);
} else {
const auto cutoff_time = parse_time(options);
success = print_older_than_n(paths, cutoff_time, time_func, verbose);
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment