insooth/uri-query-string-parser.cpp

## uri-query-string-parser.cpp
// see LICENSE on insooth.github.io

// parse key-value pairs concatenated with ampersand: a=b&c=d&u=v
// look for selected keys


#include <iostream>

#include <cassert>
#include <cstring>  // strlen
#include <cstddef>  // size_t
#include <array>
#include <numeric>  // accumulate
#include <optional>  // make_optional, nullopt, optional
#include <type_traits> // {remove,}extent, decay, add_const
#include <utility>  // pair


using stringee = std::pair<std::size_t, std::size_t>; // no trailing \0

using kv = std::pair<stringee, stringee>; // key-value

constexpr std::size_t N = 3;  // max kv items in a string

using parsed = std::array<kv, N>;

// size of a token and its number chosen arbitrary
using filter = std::array<char[3], 3>;


// convert indices stored in stringee to a string part extracted from s
template<std::size_t N>
auto materialise(const char* const s, std::size_t n, const stringee& x) -> std::array<char, N>
{
    assert((n >= x.first) && (n >= x.second) && (x.second >= x.first));

    std::array<char, N> r = { 0 };

    const std::size_t m = x.second - x.first;
    assert(N >= m);  // fits into result?
    assert((x.first + m) <= n);

    for (std::size_t i = 0; i < m; ++i)
    {
        r[i] = s[x.first + i];
    }

    return r;
}

// any of allowed keys matches?
bool expected(std::decay_t<std::add_const_t<filter::value_type>> x, std::size_t n = std::extent_v<filter::value_type>)
{
    constexpr filter ff =
    {{
      {'f', 's', 't' }
    , {'s', 'n', 'd' }
    , {'3', 'r', 'd' }
    }};

    assert(n <= ff.size());

    return std::accumulate(ff.begin(), ff.end()
                , false
                , [x, n](bool r, const auto& f)
                  { return r || (0 == std::memcmp(f, x, n)); }
                );
}

// array<T, N> -> (T*, N)
bool expected(const std::array<std::remove_extent_t<filter::value_type>, std::extent_v<filter::value_type>>& x)
{
    return expected(x.data(), x.size());
}

// explode('=', explode('&', s))
parsed parse(const char* const s, std::size_t n)
{
    [[maybe_unused]] constexpr auto max = std::numeric_limits<std::size_t>::max();
    assert(n < max);

    // TODO: fill this regardless the size of parsed (N)
    constexpr auto invalid = kv{stringee{max, max}, stringee{max, max}};
    parsed p = {invalid, invalid, invalid};

    bool is_key = true;  // URI starts with a key
    kv current = invalid;

    auto& k = current.first;
    auto& v = current.second;

    for (std::size_t i = 0, item = 0; (i < n) && (item < p.size()); ++i)
    {
        if ((0 == i) || ('&' == s[i])) // new key-value pairs starts
        {
            assert(! is_key || (0 == i));

            is_key = true;

            if (0 == i)  // we start with a key
            {
                k.first = i;  // start of a new kv pair
            }
            else
            {
                v.second = i;  // end of value

                // next key-value pair
                if (expected(materialise<std::extent_v<filter::value_type>>(s, n, k)))
                {
                    p[item] = current;
                    current = invalid;
                    ++item;
                }

                k.first = i + 1;  // start of a new kv pair
            }
        }
        else if ('=' == s[i])  // found a value
        {
            assert(true == is_key);

            is_key = false;

            k.second = i;  // end of key
            v.first = i + 1;  // start of a new value

        }
        else // collect key or value data
        {
            // +1: past-the-end element is a last one (like in iterators)
            if (is_key) { k.second = i + 1; } else { v.second = i + 1; };
        }
    }

    return p;
}

#define INVALIDATE false
const char* validate(const char* const s, std::size_t n)
{
    return INVALIDATE ? nullptr : s;
}


void print(const parsed& ps, const char* const s, std::size_t n)
{
    for (const auto& p : ps)
    {
        const auto& [k, v] = p;

        const auto valid = [s, n](std::size_t offset) { return offset <= n; };  // <=: past-the-end is the "last" iterator

        std::cout << ((valid(k.first) && valid(k.second) && (k.second >= k.first))  // >=: empty string case
                        ? std::string(s + k.first, s + k.second)
                        : "?")
                  << " = "
                  << ((valid(v.first) && valid(v.second) && (v.second >= v.first))  // >=: empty string case
                        ? std::string(s + v.first, s + v.second)
                        : "?")
                  << '\n';
    }
}


template<class T, class F, class... Ts>
//  requires Callable<F, T, std::optional<T>>
constexpr auto bind(std::optional<T>&& v, F&& f, Ts&&... ts)
{
    // CTAD to prevent from type nesting
    return v ? std::optional{std::forward<F>(f)(*v, std::forward<Ts>(ts)...)} : std::nullopt;
}


int main()
{
    const char* const s = "fst=OK&xyz=NOK&snd=OK&3rd=OK&uvw=NOK";
    const std::size_t n = std::strlen(s);

    // with error handling using monad
    auto m1 = bind(
            [s, n]
            {
                auto ss = validate(s, n);

                return (nullptr == ss)
                    ? std::nullopt
                    : std::make_optional(ss);
            }(), parse, n);

    if (m1) print(m1.value(), s, n);  // cannot compose (? -- different "monad", cannot deduce type)


    std::cout << "\n\n";

#ifndef INVALIDATE
    print(parse(validate(s, n), n), s, n);
#endif

    return 0;
}


/*

// http://coliru.stacked-crooked.com/a/48b25de00aad3b6c

g++ -std=c++17 -O2 -Wall -pedantic -pthread main.cpp && ./a.out
fst = OK
snd = OK
3rd = OK


*/
	// see LICENSE on insooth.github.io

	// parse key-value pairs concatenated with ampersand: a=b&c=d&u=v
	// look for selected keys


	#include <iostream>

	#include <cassert>
	#include <cstring> // strlen
	#include <cstddef> // size_t
	#include <array>
	#include <numeric> // accumulate
	#include <optional> // make_optional, nullopt, optional
	#include <type_traits> // {remove,}extent, decay, add_const
	#include <utility> // pair





	using stringee = std::pair<std::size_t, std::size_t>; // no trailing \0

	using kv = std::pair<stringee, stringee>; // key-value

	constexpr std::size_t N = 3; // max kv items in a string

	using parsed = std::array<kv, N>;

	// size of a token and its number chosen arbitrary
	using filter = std::array<char[3], 3>;




	// convert indices stored in stringee to a string part extracted from s
	template<std::size_t N>
	auto materialise(const char* const s, std::size_t n, const stringee& x) -> std::array<char, N>
	{
	assert((n >= x.first) && (n >= x.second) && (x.second >= x.first));

	std::array<char, N> r = { 0 };

	const std::size_t m = x.second - x.first;
	assert(N >= m); // fits into result?
	assert((x.first + m) <= n);

	for (std::size_t i = 0; i < m; ++i)
	{
	r[i] = s[x.first + i];
	}

	return r;
	}

	// any of allowed keys matches?
	bool expected(std::decay_t<std::add_const_t<filter::value_type>> x, std::size_t n = std::extent_v<filter::value_type>)
	{
	constexpr filter ff =
	{{
	{'f', 's', 't' }
	, {'s', 'n', 'd' }
	, {'3', 'r', 'd' }
	}};

	assert(n <= ff.size());

	return std::accumulate(ff.begin(), ff.end()
	, false
	, [x, n](bool r, const auto& f)
	{ return r \|\| (0 == std::memcmp(f, x, n)); }
	);
	}

	// array<T, N> -> (T*, N)
	bool expected(const std::array<std::remove_extent_t<filter::value_type>, std::extent_v<filter::value_type>>& x)
	{
	return expected(x.data(), x.size());
	}

	// explode('=', explode('&', s))
	parsed parse(const char* const s, std::size_t n)
	{
	[[maybe_unused]] constexpr auto max = std::numeric_limits<std::size_t>::max();
	assert(n < max);

	// TODO: fill this regardless the size of parsed (N)
	constexpr auto invalid = kv{stringee{max, max}, stringee{max, max}};
	parsed p = {invalid, invalid, invalid};

	bool is_key = true; // URI starts with a key
	kv current = invalid;

	auto& k = current.first;
	auto& v = current.second;

	for (std::size_t i = 0, item = 0; (i < n) && (item < p.size()); ++i)
	{
	if ((0 == i) \|\| ('&' == s[i])) // new key-value pairs starts
	{
	assert(! is_key \|\| (0 == i));

	is_key = true;

	if (0 == i) // we start with a key
	{
	k.first = i; // start of a new kv pair
	}
	else
	{
	v.second = i; // end of value

	// next key-value pair
	if (expected(materialise<std::extent_v<filter::value_type>>(s, n, k)))
	{
	p[item] = current;
	current = invalid;
	++item;
	}

	k.first = i + 1; // start of a new kv pair
	}
	}
	else if ('=' == s[i]) // found a value
	{
	assert(true == is_key);

	is_key = false;

	k.second = i; // end of key
	v.first = i + 1; // start of a new value

	}
	else // collect key or value data
	{
	// +1: past-the-end element is a last one (like in iterators)
	if (is_key) { k.second = i + 1; } else { v.second = i + 1; };
	}
	}

	return p;
	}

	#define INVALIDATE false
	const char* validate(const char* const s, std::size_t n)
	{
	return INVALIDATE ? nullptr : s;
	}



	void print(const parsed& ps, const char* const s, std::size_t n)
	{
	for (const auto& p : ps)
	{
	const auto& [k, v] = p;

	const auto valid = [s, n](std::size_t offset) { return offset <= n; }; // <=: past-the-end is the "last" iterator

	std::cout << ((valid(k.first) && valid(k.second) && (k.second >= k.first)) // >=: empty string case
	? std::string(s + k.first, s + k.second)
	: "?")
	<< " = "
	<< ((valid(v.first) && valid(v.second) && (v.second >= v.first)) // >=: empty string case
	? std::string(s + v.first, s + v.second)
	: "?")
	<< '\n';
	}
	}



	template<class T, class F, class... Ts>
	// requires Callable<F, T, std::optional<T>>
	constexpr auto bind(std::optional<T>&& v, F&& f, Ts&&... ts)
	{
	// CTAD to prevent from type nesting
	return v ? std::optional{std::forward<F>(f)(*v, std::forward<Ts>(ts)...)} : std::nullopt;
	}


	int main()
	{
	const char* const s = "fst=OK&xyz=NOK&snd=OK&3rd=OK&uvw=NOK";
	const std::size_t n = std::strlen(s);

	// with error handling using monad
	auto m1 = bind(
	[s, n]
	{
	auto ss = validate(s, n);

	return (nullptr == ss)
	? std::nullopt
	: std::make_optional(ss);
	}(), parse, n);

	if (m1) print(m1.value(), s, n); // cannot compose (? -- different "monad", cannot deduce type)


	std::cout << "\n\n";

	#ifndef INVALIDATE
	print(parse(validate(s, n), n), s, n);
	#endif

	return 0;
	}



	/*

	// http://coliru.stacked-crooked.com/a/48b25de00aad3b6c

	g++ -std=c++17 -O2 -Wall -pedantic -pthread main.cpp && ./a.out
	fst = OK
	snd = OK
	3rd = OK


	*/