Last active
August 29, 2015 14:10
-
-
Save declank/cf545c563d46cb643d22 to your computer and use it in GitHub Desktop.
Exploring boost::filesystem and C++11 features, a program that indexes and searches (ANSI) text files in a directory.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// File built using the following: (Boost required) | |
// g++ -std=c++11 indexer.cpp -o indexer -lboost_system -lboost_filesystem | |
#include <cstring> | |
#include <forward_list> | |
#include <fstream> | |
#include <iostream> | |
#include <map> | |
#include <sstream> | |
#include <vector> | |
#include <boost/filesystem.hpp> | |
using namespace std; | |
using namespace boost::filesystem; | |
typedef string word; | |
typedef string filename; | |
typedef map<word, map<filename, int>> index_type; | |
int search(int argc, char** argv) { | |
if(argc < 4) { | |
cerr << "Index file and term not specified.\n"; | |
return EXIT_FAILURE; | |
} | |
string given_search_term(argv[3]); | |
ifstream index_file(argv[2]); | |
string line; | |
bool found = false; | |
string::size_type first_delimiter, second_delimiter; | |
while(getline(index_file, line)) { | |
// If term is not in line | |
first_delimiter = line.find("///"); | |
string line_search_term(line.substr(0, first_delimiter)); | |
if(given_search_term != line_search_term) | |
continue; | |
found = true; | |
second_delimiter = line.find("///", first_delimiter + 3); | |
cout << line.substr(first_delimiter + 3, second_delimiter - first_delimiter - 3) << '\n'; | |
} | |
if(!found) { | |
cout << "Search term not found in files.\n"; | |
} | |
index_file.close(); | |
return EXIT_SUCCESS; | |
} | |
forward_list<filename> build_file_list(const path& directory) { | |
forward_list<filename> l; | |
for (directory_iterator itr(directory); itr != directory_iterator(); ++itr) { | |
l.push_front(itr->path().native()); | |
} | |
return l; | |
} | |
class word_iterator { | |
public: | |
word_iterator(ifstream& file) : file_(file) {} | |
bool next(string& word) { | |
word = ""; | |
char letter; | |
bool next = false; | |
while(!end()) { | |
letter = file_.get(); | |
// Skip non-alpha characters | |
if(!isalpha(letter)) | |
continue; | |
next = true; | |
break; | |
} | |
do { | |
word += tolower(letter); | |
letter = file_.get(); | |
} while(!end() && isalpha(letter)); | |
return next; | |
} | |
inline bool end() { | |
return file_.eof(); | |
} | |
private: | |
ifstream& file_; | |
}; | |
index_type build_index(forward_list<filename> files_to_be_indexed) { | |
index_type index; | |
for (const auto& filename : files_to_be_indexed) { | |
ifstream file(filename); | |
word_iterator itr(file); | |
string word; | |
while(itr.next(word)) { | |
++index[word][filename]; | |
} | |
file.close(); | |
} | |
return index; | |
} | |
void write_index_to_file(index_type index, char* filename) { | |
string line; | |
ofstream index_file(filename); | |
for (const auto& outer_pair : index) { | |
for (const auto& inner_pair : outer_pair.second) { | |
stringstream ss; | |
ss << outer_pair.first << "///" << inner_pair.first << "///" << inner_pair.second << '\n'; | |
line = ss.str(); | |
index_file << line; | |
} | |
} | |
} | |
int index(int argc, char** argv) { | |
if(argc < 3) { | |
cerr << "Need to specifiy a directory and index file to write create." << '\n'; | |
return EXIT_FAILURE; | |
} | |
path directory(argv[2]); | |
char* index_file = argv[3]; | |
if(!is_directory(directory)) { | |
cerr << "Specified directory is invalid." << '\n'; | |
return EXIT_FAILURE; | |
} | |
forward_list<filename> files_to_be_indexed = build_file_list(absolute(directory)); | |
index_type words_in_files = build_index(files_to_be_indexed); | |
write_index_to_file(words_in_files, index_file); | |
return EXIT_SUCCESS; | |
} | |
void error_usage(char* program_name) { | |
cerr << "Incorrect command line usage\n"; | |
cerr << "Usage: " << program_name << " index directory_to_index index_file\n"; | |
cerr << "Usage: " << program_name << " search index_file term\n"; | |
} | |
int main(int argc, char** argv) { | |
if(argc < 2) { | |
error_usage(argv[0]); | |
return EXIT_FAILURE; | |
} | |
if(strcmp(argv[1], "search") == 0) | |
return search(argc, argv); | |
else if(strcmp(argv[1], "index") == 0) | |
return index(argc, argv); | |
error_usage(argv[0]); | |
return EXIT_FAILURE; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment