Skip to content

Instantly share code, notes, and snippets.

@malkia
Created July 30, 2018 16:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save malkia/15bd4936d29c9a40f55b4959c048ee88 to your computer and use it in GitHub Desktop.
Save malkia/15bd4936d29c9a40f55b4959c048ee88 to your computer and use it in GitHub Desktop.
reddit_cpp_speeding_up_string_view_string_split
// https://www.reddit.com/r/cpp/comments/934r63/speeding_up_string_view_string_split/
// StringViewTestss.cpp : performance experiments for string_view
//
#include <string_view>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <chrono>
#include <fstream>
#include <sstream>
// initialize this, as otherwise we'll have
// system effects here.
char global[1024*1024*1024] = {};
size_t global_ptr;
static inline void* dumb_alloc(size_t size)
{
void* p = global + global_ptr;
global_ptr = (global_ptr + size + 7) & ~7;
if (global_ptr >= sizeof(global)) {
printf("panic at the disco\n");
abort();
}
return p;
}
void* operator new(size_t size)
{
return dumb_alloc(size);
}
void* operator new[](size_t size)
{
return dumb_alloc(size);
}
void operator delete(void*) _NOEXCEPT
{
}
void operator delete[](void*) _NOEXCEPT
{
}
using namespace std::literals;
// code based on examples from https://tristanbrindle.com/posts/a-quicker-study-on-tokenising/
// and from https://marcoarena.wordpress.com/2017/01/03/string_view-odi-et-amo/
size_t numAllocations = 0;
size_t sizeAllocations = 0;
// uncomment to count allocations...
/*void* operator new(std::size_t sz) {
numAllocations++;
sizeAllocations += sz;
return std::malloc(sz);
}*/
// uses string::find_first_of
std::vector<std::string>
split(const std::string& str, const std::string& delims = " ")
{
std::vector<std::string> output;
//output.reserve(str.length() / 4);
size_t first = 0;
while (first < str.size())
{
const auto second = str.find_first_of(delims, first);
if (first != second)
{
output.emplace_back(str.data() + first, str.data() + (second == std::string::npos ? str.size() : second));
}
if (second == std::string::npos)
break;
first = second + 1;
}
return output;
}
// uses std::find_first_of
std::vector<std::string>
splitStd(const std::string& str, const std::string& delims = " ")
{
std::vector<std::string> output;
//output.reserve(str.length() / 4);
auto first = std::cbegin(str);
while (first != std::cend(str))
{
const auto second = std::find_first_of(first, std::cend(str),
std::cbegin(delims), std::cend(delims));
if (first != second)
{
output.emplace_back(first, second);
}
if (second == std::cend(str))
break;
first = std::next(second);
}
return output;
}
// strings, but works on pointers rather than iterators
// code by JFT
std::vector<std::string> splitPtr(const std::string& str, const std::string& delims = " ")
{
std::vector<std::string> output;
//output.reserve(str.size() / 2);
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) {
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims));
if (first != second)
output.emplace_back(first, second);
}
return output;
}
// uses string_view::find_first_of
std::vector<std::string_view>
splitSV(std::string_view strv, std::string_view delims = " ")
{
std::vector<std::string_view> output;
//output.reserve(strv.length() / 4);
size_t first = 0;
while (first < strv.size())
{
const auto second = strv.find_first_of(delims, first);
//std::cout << first << ", " << second << '\n';
if (first != second)
{
output.emplace_back(strv.substr(first, second-first));
}
if (second == std::string_view::npos)
break;
first = second + 1;
}
return output;
}
// uses std::find_first_of rather than string_view::find_first_of
std::vector<std::string_view>
splitSVStd(std::string_view strv, std::string_view delims = " ")
{
std::vector<std::string_view> output;
//output.reserve(strv.length() / 4);
auto first = strv.begin();
while (first != strv.end())
{
const auto second = std::find_first_of(first, std::cend(strv),
std::cbegin(delims), std::cend(delims));
//std::cout << first << ", " << second << '\n';
if (first != second)
{
output.emplace_back(strv.substr(std::distance(strv.begin(), first), std::distance(first, second)));
}
if (second == strv.end())
break;
first = std::next(second);
}
return output;
}
// based on the JFT's comment:
std::vector<std::string_view> splitSVPtr(std::string_view str, std::string_view delims = " ")
{
std::vector<std::string_view> output;
//output.reserve(str.size() / 2);
for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) {
second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims));
if (first != second)
output.emplace_back(first, second - first);
}
return output;
}
const std::string_view LoremIpsumStrv{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, "
"sed do eiusmod tempor incididuntsuperlongwordsuper ut labore et dolore magna aliqua. Ut enim ad minim veniam, "
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute "
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. "
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." };
template <typename TFunc> void RunAndMeasure(const char* title, TFunc func)
{
numAllocations = 0;
sizeAllocations = 0;
const auto start = std::chrono::steady_clock::now();
auto ret = func();
const auto end = std::chrono::steady_clock::now();
std::cout << title << ": " <<
std::chrono::duration <double, std::milli>(end - start).count()
<< " ms, res " << ret << " Allocation count: " << numAllocations << ", size " << sizeAllocations << "\n";
global_ptr = 0;
}
int main(int argc, const char** argv)
{
std::cout << sizeof(std::string_view) << '\n';
std::cout << sizeof(std::string) << '\n';
std::string testString{ LoremIpsumStrv };
if (argc > 1 && "nofile"s != argv[1])
{
std::ifstream inFile(argv[1]);
std::stringstream strStream;
strStream << inFile.rdbuf();
testString = strStream.str();
}
std::cout << "string length: " << testString.length() << '\n';
const int ITERS = argc > 2 ? atoi(argv[2]) : 10000;
std::cout << "test iterations: " << ITERS << '\n';
RunAndMeasure("string split", [ITERS, &testString]()
{
std::size_t sizes = 0;
for (int i = 0; i < ITERS; ++i)
{
auto v = split(testString);
sizes += v.size();
global_ptr = 0;
}
return sizes;
});
RunAndMeasure("string split std", [ITERS, &testString]()
{
std::size_t sizes = 0;
for (int i = 0; i < ITERS; ++i)
{
auto v = splitStd(testString);
sizes += v.size();
global_ptr = 0;
}
return sizes;
});
RunAndMeasure("string split ptr", [ITERS, &testString]()
{
std::size_t sizes = 0;
for (int i = 0; i < ITERS; ++i)
{
auto v = splitPtr(testString);
sizes += v.size();
global_ptr = 0;
}
return sizes;
});
RunAndMeasure("string_view split", [ITERS, &testString]()
{
std::size_t sizes = 0;
for (int i = 0; i < ITERS; ++i)
{
auto v = splitSV(testString);
sizes += v.size();
global_ptr = 0;
}
return sizes;
});
RunAndMeasure("string_view split std", [ITERS, &testString]()
{
std::size_t sizes = 0;
for (int i = 0; i < ITERS; ++i)
{
auto v = splitSVStd(testString);
sizes += v.size();
global_ptr = 0;
}
return sizes;
});
RunAndMeasure("string_view split ptr", [ITERS, &testString]()
{
std::size_t sizes = 0;
for (int i = 0; i < ITERS; ++i)
{
auto v = splitSVStd(testString);
sizes += v.size();
global_ptr = 0;
}
return sizes;
});
}
@malkia
Copy link
Author

malkia commented Jul 30, 2018

Results after using "StackAllocator" and forcing it to "deallocate" all at end of each ITER cycle - e.g. global_ptr = 0.

`
clang++ -O3 -std=c++17 -stdlib=libc++ StringViewTest.cpp
(bionic)malkia@localhost:~/p/StringViewTests$ time -p operf -g -l ./a.out nofile 10000000
Kernel profiling is not possible with current system config.
Set /proc/sys/kernel/kptr_restrict to 0 to collect kernel samples.
operf: Profiler started
16
24
string length: 489
test iterations: 10000000
string split: 5759.3 ms, res 10000077 Allocation count: 0, size 0
string split std: 6059.83 ms, res 10000000 Allocation count: 0, size 0
string split ptr: 7337.7 ms, res 10000000 Allocation count: 0, size 0
string_view split: 5700.79 ms, res 10000000 Allocation count: 0, size 0
string_view split std: 5769.85 ms, res 10000000 Allocation count: 0, size 0
string_view split ptr: 4813.97 ms, res 10000000 Allocation count: 0, size 0

Profiling done.
`

actual oprofile info:

`
(bionic)malkia@localhost:~/p/StringViewTests$ opreport -g -l
Using /home/malkia/p/StringViewTests/oprofile_data/samples/ for samples directory.

WARNING: Lost samples detected! See /home/malkia/p/StringViewTests/oprofile_data/samples/operf.log for details.
CPU: Intel Skylake microarchitecture, speed 3600 MHz (estimated)
Counted cpu_clk_unhalted events () with a unit mask of 0x00 (Core cycles when at least one thread on the physical core is not in halt state) count 30000045
samples % linenr info image name symbol name
214 35.6073 (no location information) a.out splitStd(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&)
176 29.2845 (no location information) a.out splitPtr(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&)
172 28.6190 (no location information) a.out split(std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&, std::__1::basic_string<char, std::__1::char_traits, std::__1::allocator > const&)
14 2.3295 (no location information) a.out splitSV(std::__1::basic_string_view<char, std::__1::char_traits >, std::__1::basic_string_view<char, std::__1::char_traits >)
14 2.3295 (no location information) a.out splitSVStd(std::__1::basic_string_view<char, std::__1::char_traits >, std::__1::basic_string_view<char, std::__1::char_traits >)
11 1.8303 (no location information) no-vmlinux /no-vmlinux
`

e.g. the goal is to eliminate new/delete/malloc/free from the picture.
For previous results with them: https://www.reddit.com/r/cpp/comments/934r63/speeding_up_string_view_string_split/e3ao6e2/

Roughly - x2, x3 speed loss due to malloc/free - without going into details (like we have 6 individual benchmarks here, and they use quite differently malloc/free/new/delete).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment