Created
August 29, 2020 11:08
-
-
Save Gnomorian/92c5350fbbaa450864ef471c79905135 to your computer and use it in GitHub Desktop.
Benchmark of different built in ways to split a string in c++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <regex> | |
#include <vector> | |
#include <iostream> | |
#include <algorithm> | |
#include <sstream> | |
#include <chrono> | |
#include <assert.h> | |
using namespace std; | |
using Splits = vector<string>; | |
Splits stringstreamSplit(const string& textToSplit, char token) | |
{ | |
Splits splits; | |
stringstream stream(textToSplit); | |
string split; | |
while (getline(stream, split, token)) | |
splits.emplace_back(split); | |
return splits; | |
} | |
Splits regexSplit(const string& textToSplit, char token) | |
{ | |
Splits splits; | |
regex re(string{token}); | |
smatch match; | |
string buffer{textToSplit}; | |
while (regex_search(buffer, match, re)) | |
{ | |
splits.emplace_back(match.prefix()); | |
buffer = match.suffix(); | |
} | |
splits.emplace_back(buffer); | |
return splits; | |
} | |
Splits findSplit(const string& textToSplit, char token) | |
{ | |
Splits splits; | |
size_t begin{0}; | |
auto pos{textToSplit.find(token, begin)}; | |
while (pos != string::npos) | |
{ | |
splits.emplace_back(textToSplit.substr(begin, pos - begin)); | |
begin = pos+1; | |
pos = textToSplit.find(token, begin); | |
} | |
splits.emplace_back(textToSplit.substr(begin)); | |
return splits; | |
} | |
void assertValidSplits(const Splits& expected, const Splits& actual) | |
{ | |
assert(std::equal(expected.begin(), expected.end(), actual.begin())); | |
} | |
template<typename Fn> | |
std::chrono::duration<double, std::milli> split_benchmark(Fn func, size_t iterations) | |
{ | |
const string a{"this is a string of text to split"}; | |
const Splits expected{"this", "is", "a", "string", "of", "text", "to", "split"}; | |
const auto begin{std::chrono::high_resolution_clock::now()}; | |
for(size_t i = 0; i < iterations; i++) | |
{ | |
const auto result{func(a, ' ')}; | |
assertValidSplits(expected, result); | |
} | |
const auto end{std::chrono::high_resolution_clock::now()}; | |
return chrono::duration<double, std::milli>(end - begin); | |
} | |
int main() | |
{ | |
const auto iterationCount{1000000}; | |
const auto sstreamTime{split_benchmark(stringstreamSplit, iterationCount)}; | |
const auto regexTime{split_benchmark(regexSplit, iterationCount)}; | |
const auto findTime{split_benchmark(findSplit, iterationCount)}; | |
cout << "std::stringstream took " << sstreamTime.count() << "ms" << endl; | |
cout << "std::regex took " << regexTime.count() << "ms" << endl; | |
cout << "std::string.find() took " << findTime.count() << "ms" << endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
MinGW x64 windows (Debug binaries):
std::stringstream took 1671ms
std::regex took 15994.1ms
std::string.find() took 1189.95ms
MinGW x64 windows (Release binaries):
std::stringstream took 907.998ms
std::regex took 3199.08ms
std::string.find() took 366.002ms
MSVC 14 (2017) x64 (Release binaries)
std::stringstream took 1117.02ms
std::regex took 6823.93ms
std::string.find() took 536.398ms
MSVC 14 (2017) x64 (Debug binaries)
std::stringstream took 74282.3ms
std::regex took 660995ms
std::string.find() took 76841.8ms