Skip to content

Instantly share code, notes, and snippets.

@declank
Last active August 29, 2015 14:10
Show Gist options
  • Save declank/cf545c563d46cb643d22 to your computer and use it in GitHub Desktop.
Save declank/cf545c563d46cb643d22 to your computer and use it in GitHub Desktop.
Exploring boost::filesystem and C++11 features, a program that indexes and searches (ANSI) text files in a directory.
// File built using the following: (Boost required)
// g++ -std=c++11 indexer.cpp -o indexer -lboost_system -lboost_filesystem
#include <cstring>
#include <forward_list>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include <boost/filesystem.hpp>
using namespace std;
using namespace boost::filesystem;
typedef string word;
typedef string filename;
typedef map<word, map<filename, int>> index_type;
int search(int argc, char** argv) {
if(argc < 4) {
cerr << "Index file and term not specified.\n";
return EXIT_FAILURE;
}
string given_search_term(argv[3]);
ifstream index_file(argv[2]);
string line;
bool found = false;
string::size_type first_delimiter, second_delimiter;
while(getline(index_file, line)) {
// If term is not in line
first_delimiter = line.find("///");
string line_search_term(line.substr(0, first_delimiter));
if(given_search_term != line_search_term)
continue;
found = true;
second_delimiter = line.find("///", first_delimiter + 3);
cout << line.substr(first_delimiter + 3, second_delimiter - first_delimiter - 3) << '\n';
}
if(!found) {
cout << "Search term not found in files.\n";
}
index_file.close();
return EXIT_SUCCESS;
}
forward_list<filename> build_file_list(const path& directory) {
forward_list<filename> l;
for (directory_iterator itr(directory); itr != directory_iterator(); ++itr) {
l.push_front(itr->path().native());
}
return l;
}
class word_iterator {
public:
word_iterator(ifstream& file) : file_(file) {}
bool next(string& word) {
word = "";
char letter;
bool next = false;
while(!end()) {
letter = file_.get();
// Skip non-alpha characters
if(!isalpha(letter))
continue;
next = true;
break;
}
do {
word += tolower(letter);
letter = file_.get();
} while(!end() && isalpha(letter));
return next;
}
inline bool end() {
return file_.eof();
}
private:
ifstream& file_;
};
index_type build_index(forward_list<filename> files_to_be_indexed) {
index_type index;
for (const auto& filename : files_to_be_indexed) {
ifstream file(filename);
word_iterator itr(file);
string word;
while(itr.next(word)) {
++index[word][filename];
}
file.close();
}
return index;
}
void write_index_to_file(index_type index, char* filename) {
string line;
ofstream index_file(filename);
for (const auto& outer_pair : index) {
for (const auto& inner_pair : outer_pair.second) {
stringstream ss;
ss << outer_pair.first << "///" << inner_pair.first << "///" << inner_pair.second << '\n';
line = ss.str();
index_file << line;
}
}
}
int index(int argc, char** argv) {
if(argc < 3) {
cerr << "Need to specifiy a directory and index file to write create." << '\n';
return EXIT_FAILURE;
}
path directory(argv[2]);
char* index_file = argv[3];
if(!is_directory(directory)) {
cerr << "Specified directory is invalid." << '\n';
return EXIT_FAILURE;
}
forward_list<filename> files_to_be_indexed = build_file_list(absolute(directory));
index_type words_in_files = build_index(files_to_be_indexed);
write_index_to_file(words_in_files, index_file);
return EXIT_SUCCESS;
}
void error_usage(char* program_name) {
cerr << "Incorrect command line usage\n";
cerr << "Usage: " << program_name << " index directory_to_index index_file\n";
cerr << "Usage: " << program_name << " search index_file term\n";
}
int main(int argc, char** argv) {
if(argc < 2) {
error_usage(argv[0]);
return EXIT_FAILURE;
}
if(strcmp(argv[1], "search") == 0)
return search(argc, argv);
else if(strcmp(argv[1], "index") == 0)
return index(argc, argv);
error_usage(argv[0]);
return EXIT_FAILURE;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment