Created
January 9, 2020 13:00
-
-
Save insooth/6cf3029dfcf930611bc190d6b6da4f9e to your computer and use it in GitHub Desktop.
Parsing URI's query string using zero-copy with token filtration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// see LICENSE on insooth.github.io | |
// parse key-value pairs concatenated with ampersand: a=b&c=d&u=v | |
// look for selected keys | |
#include <iostream> | |
#include <cassert> | |
#include <cstring> // strlen | |
#include <cstddef> // size_t | |
#include <array> | |
#include <numeric> // accumulate | |
#include <optional> // make_optional, nullopt, optional | |
#include <type_traits> // {remove,}extent, decay, add_const | |
#include <utility> // pair | |
using stringee = std::pair<std::size_t, std::size_t>; // no trailing \0 | |
using kv = std::pair<stringee, stringee>; // key-value | |
constexpr std::size_t N = 3; // max kv items in a string | |
using parsed = std::array<kv, N>; | |
// size of a token and its number chosen arbitrary | |
using filter = std::array<char[3], 3>; | |
// convert indices stored in stringee to a string part extracted from s | |
template<std::size_t N> | |
auto materialise(const char* const s, std::size_t n, const stringee& x) -> std::array<char, N> | |
{ | |
assert((n >= x.first) && (n >= x.second) && (x.second >= x.first)); | |
std::array<char, N> r = { 0 }; | |
const std::size_t m = x.second - x.first; | |
assert(N >= m); // fits into result? | |
assert((x.first + m) <= n); | |
for (std::size_t i = 0; i < m; ++i) | |
{ | |
r[i] = s[x.first + i]; | |
} | |
return r; | |
} | |
// any of allowed keys matches? | |
bool expected(std::decay_t<std::add_const_t<filter::value_type>> x, std::size_t n = std::extent_v<filter::value_type>) | |
{ | |
constexpr filter ff = | |
{{ | |
{'f', 's', 't' } | |
, {'s', 'n', 'd' } | |
, {'3', 'r', 'd' } | |
}}; | |
assert(n <= ff.size()); | |
return std::accumulate(ff.begin(), ff.end() | |
, false | |
, [x, n](bool r, const auto& f) | |
{ return r || (0 == std::memcmp(f, x, n)); } | |
); | |
} | |
// array<T, N> -> (T*, N) | |
bool expected(const std::array<std::remove_extent_t<filter::value_type>, std::extent_v<filter::value_type>>& x) | |
{ | |
return expected(x.data(), x.size()); | |
} | |
// explode('=', explode('&', s)) | |
parsed parse(const char* const s, std::size_t n) | |
{ | |
[[maybe_unused]] constexpr auto max = std::numeric_limits<std::size_t>::max(); | |
assert(n < max); | |
// TODO: fill this regardless the size of parsed (N) | |
constexpr auto invalid = kv{stringee{max, max}, stringee{max, max}}; | |
parsed p = {invalid, invalid, invalid}; | |
bool is_key = true; // URI starts with a key | |
kv current = invalid; | |
auto& k = current.first; | |
auto& v = current.second; | |
for (std::size_t i = 0, item = 0; (i < n) && (item < p.size()); ++i) | |
{ | |
if ((0 == i) || ('&' == s[i])) // new key-value pairs starts | |
{ | |
assert(! is_key || (0 == i)); | |
is_key = true; | |
if (0 == i) // we start with a key | |
{ | |
k.first = i; // start of a new kv pair | |
} | |
else | |
{ | |
v.second = i; // end of value | |
// next key-value pair | |
if (expected(materialise<std::extent_v<filter::value_type>>(s, n, k))) | |
{ | |
p[item] = current; | |
current = invalid; | |
++item; | |
} | |
k.first = i + 1; // start of a new kv pair | |
} | |
} | |
else if ('=' == s[i]) // found a value | |
{ | |
assert(true == is_key); | |
is_key = false; | |
k.second = i; // end of key | |
v.first = i + 1; // start of a new value | |
} | |
else // collect key or value data | |
{ | |
// +1: past-the-end element is a last one (like in iterators) | |
if (is_key) { k.second = i + 1; } else { v.second = i + 1; }; | |
} | |
} | |
return p; | |
} | |
#define INVALIDATE false | |
const char* validate(const char* const s, std::size_t n) | |
{ | |
return INVALIDATE ? nullptr : s; | |
} | |
void print(const parsed& ps, const char* const s, std::size_t n) | |
{ | |
for (const auto& p : ps) | |
{ | |
const auto& [k, v] = p; | |
const auto valid = [s, n](std::size_t offset) { return offset <= n; }; // <=: past-the-end is the "last" iterator | |
std::cout << ((valid(k.first) && valid(k.second) && (k.second >= k.first)) // >=: empty string case | |
? std::string(s + k.first, s + k.second) | |
: "?") | |
<< " = " | |
<< ((valid(v.first) && valid(v.second) && (v.second >= v.first)) // >=: empty string case | |
? std::string(s + v.first, s + v.second) | |
: "?") | |
<< '\n'; | |
} | |
} | |
template<class T, class F, class... Ts> | |
// requires Callable<F, T, std::optional<T>> | |
constexpr auto bind(std::optional<T>&& v, F&& f, Ts&&... ts) | |
{ | |
// CTAD to prevent from type nesting | |
return v ? std::optional{std::forward<F>(f)(*v, std::forward<Ts>(ts)...)} : std::nullopt; | |
} | |
int main() | |
{ | |
const char* const s = "fst=OK&xyz=NOK&snd=OK&3rd=OK&uvw=NOK"; | |
const std::size_t n = std::strlen(s); | |
// with error handling using monad | |
auto m1 = bind( | |
[s, n] | |
{ | |
auto ss = validate(s, n); | |
return (nullptr == ss) | |
? std::nullopt | |
: std::make_optional(ss); | |
}(), parse, n); | |
if (m1) print(m1.value(), s, n); // cannot compose (? -- different "monad", cannot deduce type) | |
std::cout << "\n\n"; | |
#ifndef INVALIDATE | |
print(parse(validate(s, n), n), s, n); | |
#endif | |
return 0; | |
} | |
/* | |
// http://coliru.stacked-crooked.com/a/48b25de00aad3b6c | |
g++ -std=c++17 -O2 -Wall -pedantic -pthread main.cpp && ./a.out | |
fst = OK | |
snd = OK | |
3rd = OK | |
*/ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment