Skip to content

Instantly share code, notes, and snippets.

@Gnomorian
Created August 29, 2020 11:08
Show Gist options
  • Save Gnomorian/92c5350fbbaa450864ef471c79905135 to your computer and use it in GitHub Desktop.
Save Gnomorian/92c5350fbbaa450864ef471c79905135 to your computer and use it in GitHub Desktop.
Benchmark of different built in ways to split a string in c++
#include <string>
#include <regex>
#include <vector>
#include <iostream>
#include <algorithm>
#include <sstream>
#include <chrono>
#include <assert.h>
using namespace std;
using Splits = vector<string>;
Splits stringstreamSplit(const string& textToSplit, char token)
{
Splits splits;
stringstream stream(textToSplit);
string split;
while (getline(stream, split, token))
splits.emplace_back(split);
return splits;
}
Splits regexSplit(const string& textToSplit, char token)
{
Splits splits;
regex re(string{token});
smatch match;
string buffer{textToSplit};
while (regex_search(buffer, match, re))
{
splits.emplace_back(match.prefix());
buffer = match.suffix();
}
splits.emplace_back(buffer);
return splits;
}
Splits findSplit(const string& textToSplit, char token)
{
Splits splits;
size_t begin{0};
auto pos{textToSplit.find(token, begin)};
while (pos != string::npos)
{
splits.emplace_back(textToSplit.substr(begin, pos - begin));
begin = pos+1;
pos = textToSplit.find(token, begin);
}
splits.emplace_back(textToSplit.substr(begin));
return splits;
}
void assertValidSplits(const Splits& expected, const Splits& actual)
{
assert(std::equal(expected.begin(), expected.end(), actual.begin()));
}
template<typename Fn>
std::chrono::duration<double, std::milli> split_benchmark(Fn func, size_t iterations)
{
const string a{"this is a string of text to split"};
const Splits expected{"this", "is", "a", "string", "of", "text", "to", "split"};
const auto begin{std::chrono::high_resolution_clock::now()};
for(size_t i = 0; i < iterations; i++)
{
const auto result{func(a, ' ')};
assertValidSplits(expected, result);
}
const auto end{std::chrono::high_resolution_clock::now()};
return chrono::duration<double, std::milli>(end - begin);
}
int main()
{
const auto iterationCount{1000000};
const auto sstreamTime{split_benchmark(stringstreamSplit, iterationCount)};
const auto regexTime{split_benchmark(regexSplit, iterationCount)};
const auto findTime{split_benchmark(findSplit, iterationCount)};
cout << "std::stringstream took " << sstreamTime.count() << "ms" << endl;
cout << "std::regex took " << regexTime.count() << "ms" << endl;
cout << "std::string.find() took " << findTime.count() << "ms" << endl;
}
@Gnomorian
Copy link
Author

MinGW x64 windows (Debug binaries):
std::stringstream took 1671ms
std::regex took 15994.1ms
std::string.find() took 1189.95ms

MinGW x64 windows (Release binaries):
std::stringstream took 907.998ms
std::regex took 3199.08ms
std::string.find() took 366.002ms

MSVC 14 (2017) x64 (Release binaries)
std::stringstream took 1117.02ms
std::regex took 6823.93ms
std::string.find() took 536.398ms

MSVC 14 (2017) x64 (Debug binaries)
std::stringstream took 74282.3ms
std::regex took 660995ms
std::string.find() took 76841.8ms

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment