Skip to content

Instantly share code, notes, and snippets.

@Agnishom
Last active September 14, 2023 18:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Agnishom/4ce98a7e7165fe9cdd06965a16962a94 to your computer and use it in GitHub Desktop.
Save Agnishom/4ce98a7e7165fe9cdd06965a16962a94 to your computer and use it in GitHub Desktop.
Regex Execution
// g++ -o boostregex-test boostregex-test.cpp -lboost_regex
#include <fstream>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <chrono>
#include <boost/regex.hpp>
std::vector<std::string> readLines(const std::string& fileName) {
std::vector<std::string> output;
std::ifstream fileStream(fileName);
if (!fileStream) {
std::cerr << "Error in read_lines: Unable to open file " << fileName << std::endl;
return output;
}
std::string line;
while (std::getline(fileStream, line)) {
output.push_back(line);
}
return output;
}
class MatchRecord {
public:
int regexId;
double durMsec; // in msec
int output; // 0 if no match, 1 if there is a match
MatchRecord(int regexId, double durMsec, int output)
: regexId(regexId), durMsec(durMsec), output(output) {}
};
int main(int argc, char **argv){
if (argc < 2) {
std::cerr << "Not enough arguments." << std::endl;
return 1;
}
std::vector<char> buffer;
char ch;
while (std::cin.get(ch)) {
buffer.push_back(ch);
}
if (!std::cin.eof() && std::cin.fail()) {
std::cerr << "Error reading input" << std::endl;
return 1;
}
std::string inputText(buffer.begin(), buffer.end());
int textLen = inputText.length();
std::cout << "input loaded, length = " << std::setprecision(2) << textLen << " bytes" << std::endl;
std::string patternFile = argv[1];
std::vector<std::string> patterns = readLines(patternFile);
std::vector<MatchRecord> results;
for (int i = 0; i < patterns.size(); i++){
int regexId = i;
std::string pattern_str = patterns[i];
std::cout << "[" << regexId << "] " << patternFile << std::endl;
std::cout << "regex: " << pattern_str << std::endl;
// Create a regular expression object from the pattern string
boost::regex pattern;
boost::smatch match;
try {
pattern = boost::regex(pattern_str);
} catch (boost::regex_error& e) {
std::cerr << "Error compiling regex: " << e.what() << std::endl;
std::cout << std::endl;
continue;
}
// Measure the time it takes to perform the match
auto start_time = std::chrono::steady_clock::now();
bool match_found = false;
try {
match_found = boost::regex_search(inputText, match, pattern);
} catch (boost::regex_error& e) {
std::cerr << "Error matching regex: " << e.what() << std::endl;
std::cout << std::endl;
continue;
}
auto end_time = std::chrono::steady_clock::now();
// Print the results
if (match_found) {
std::cout << "Match found at [" << match.position() << "," << match.position() + match.length() << "]" << std::endl;
} else {
std::cout << "No match found" << std::endl;
}
std::cout << "text length = " << textLen << " bytes" << std::endl;
auto durNano = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count();
std::cout << "duration (msec) = " << durNano / 1000000.0 << std::endl;
double throughput = (double) inputText.length() / durNano * 1000000000.0;
std::cout << "throughput (bytes/second) = " << throughput << std::endl;
std::cout << std::endl;
// add to the results
double durMsec = ((double) durNano) / 1000000.0;
int output = match_found ? 1 : 0;
MatchRecord outputRecord(regexId, durMsec, output);
results.push_back(outputRecord);
}
std::cout << "HEADER: id,text_len,duration,output" << std::endl;
for (MatchRecord r : results) {
std::cout << r.regexId << "," << textLen << "," << r.durMsec << "," << r.output << std::endl;
}
return 0;
}
// g++ -o stdregex stdregex-test.cpp
#include <fstream>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <regex>
#include <chrono>
std::vector<std::string> readLines(const std::string& fileName) {
std::vector<std::string> output;
std::ifstream fileStream(fileName);
if (!fileStream) {
std::cerr << "Error in read_lines: Unable to open file " << fileName << std::endl;
return output;
}
std::string line;
while (std::getline(fileStream, line)) {
output.push_back(line);
}
return output;
}
class MatchRecord {
public:
int regexId;
double durMsec; // in msec
int output; // 0 if no match, 1 if there is a match
MatchRecord(int regexId, double durMsec, int output)
: regexId(regexId), durMsec(durMsec), output(output) {}
};
int main(int argc, char **argv){
if (argc < 2) {
std::cerr << "Not enough arguments." << std::endl;
return 1;
}
std::vector<char> buffer;
char ch;
while (std::cin.get(ch)) {
buffer.push_back(ch);
}
if (!std::cin.eof() && std::cin.fail()) {
std::cerr << "Error reading input" << std::endl;
return 1;
}
std::string inputText(buffer.begin(), buffer.end());
int textLen = inputText.length();
std::cout << "input loaded, length = " << std::setprecision(2) << textLen << " bytes" << std::endl;
std::string patternFile = argv[1];
std::vector<std::string> patterns = readLines(patternFile);
std::vector<MatchRecord> results;
for (int i = 0; i < patterns.size(); i++){
int regexId = i;
std::string pattern_str = patterns[i];
std::cout << "[" << regexId << "] " << patternFile << std::endl;
std::cout << "regex: " << pattern_str << std::endl;
// Create a regular expression object from the pattern string
std::regex pattern;
std::smatch match;
try {
pattern = std::regex(pattern_str, std::regex::ECMAScript | std::regex::optimize);
} catch (std::regex_error& e) {
std::cerr << "Error compiling regex: " << e.what() << std::endl;
std::cout << std::endl;
continue;
}
// Measure the time it takes to perform the match
auto start_time = std::chrono::steady_clock::now();
bool match_found = false;
try {
match_found = std::regex_search(inputText, match, pattern);
} catch (std::regex_error& e) {
std::cerr << "Error matching regex: " << e.what() << std::endl;
std::cout << std::endl;
continue;
}
auto end_time = std::chrono::steady_clock::now();
// Print the results
if (match_found) {
std::cout << "Match found at [" << match.position() << "," << match.position() + match.length() << "]" << std::endl;
} else {
std::cout << "No match found" << std::endl;
}
std::cout << "text length = " << textLen << " bytes" << std::endl;
auto durNano = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count();
std::cout << "duration (msec) = " << durNano / 1000000.0 << std::endl;
double throughput = (double) inputText.length() / durNano * 1000000000.0;
std::cout << "throughput (bytes/second) = " << throughput << std::endl;
std::cout << std::endl;
// add to the results
double durMsec = ((double) durNano) / 1000000.0;
int output = match_found ? 1 : 0;
MatchRecord outputRecord(regexId, durMsec, output);
results.push_back(outputRecord);
}
std::cout << "HEADER: id,text_len,duration,output" << std::endl;
for (MatchRecord r : results) {
std::cout << r.regexId << "," << textLen << "," << r.durMsec << "," << r.output << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment