Skip to content

Instantly share code, notes, and snippets.

@sehe

sehe/Makefile Secret

Created September 20, 2011 13:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sehe/bcfbe2b5f071c7d153a0 to your computer and use it in GitHub Desktop.
Save sehe/bcfbe2b5f071c7d153a0 to your computer and use it in GitHub Desktop.
all: test
# CPPFLAGS+=--std=c++0x
CPPFLAGS+=-g -O2
CPPFLAGS+=-I ~/custom/boost_1_47_0/
%: %.cpp
g++ $(CPPFLAGS) $< -o $@
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 80
// YAGNI #4 - support boost ranges in addition to containers as input (e.g. char[])
#define SUPPORT_BOOST_RANGE // our own define for splitInto
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp> // for pre 1.47.0 boost only
#include <boost/spirit/version.hpp>
#include <sstream>
namespace /*anon*/
{
namespace phx=boost::phoenix;
namespace qi =boost::spirit::qi;
namespace karma=boost::spirit::karma;
template <typename Iterator, typename Output>
struct my_grammar : qi::grammar<Iterator, Output()>
{
typedef qi::rule<Iterator> delim_t;
//my_grammar(delim_t const& _delim) : delim(_delim),
my_grammar(delim_t _delim) : delim(_delim),
my_grammar::base_type(rule, "quoted_delimited")
{
using namespace qi;
noquote = char_ - '"';
plain = +((!delim) >> (noquote - eol));
quoted = lit('"') > *(noquote | '"' >> char_('"')) > '"';
#if SPIRIT_VERSION >= 0x2050 // boost 1.47.0
mixed = *(quoted|plain);
#else
// manual folding
mixed = *( (quoted|plain) [_a << _1]) [_val=_a.str()];
#endif
// you gotta love simple truths:
rule = mixed % delim % eol;
BOOST_SPIRIT_DEBUG_NODE(rule);
BOOST_SPIRIT_DEBUG_NODE(plain);
BOOST_SPIRIT_DEBUG_NODE(quoted);
BOOST_SPIRIT_DEBUG_NODE(noquote);
BOOST_SPIRIT_DEBUG_NODE(delim);
}
private:
qi::rule<Iterator> delim;
qi::rule<Iterator, char()> noquote;
#if SPIRIT_VERSION >= 0x2050 // boost 1.47.0
qi::rule<Iterator, std::string()> plain, quoted, mixed;
#else
qi::rule<Iterator, std::string()> plain, quoted;
qi::rule<Iterator, std::string(), qi::locals<std::ostringstream> > mixed;
#endif
qi::rule<Iterator, Output()> rule;
};
}
template <typename Input, typename Container, typename Delim>
bool splitInto(const Input& input, Container& result, Delim delim)
{
#ifdef SUPPORT_BOOST_RANGE
typedef typename boost::range_const_iterator<Input>::type It;
It first(boost::begin(input)), last(boost::end(input));
#else
typedef typename Input::const_iterator It;
It first(input.begin()), last(input.end());
#endif
try
{
my_grammar<It, Container> parser(delim);
bool r = qi::parse(first, last, parser, result);
r = r && (first == last);
if (!r)
std::cerr << "parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<It>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '";
std::cerr << std::string(e.first, e.last) << "'" << std::endl;
return false;
}
}
template <typename Input, typename Container>
bool splitInto(const Input& input, Container& result)
{
return splitInto(input, result, ' '); // default space delimited
}
/********************************************************************
* replaces '\n' character by '?' so that the demo output is more *
* comprehensible (see when a \n was parsed and when one was output *
* deliberately) *
********************************************************************/
void safechars(char& ch)
{
switch (ch) { case '\r': case '\n': ch = '?'; break; }
}
int main()
{
using namespace karma; // demo output generators only :)
std::string input;
#if SPIRIT_VERSION >= 0x2050 // boost 1.47.0
// sample invocation: simple vector of elements in order - flattened across lines
std::vector<std::string> flattened;
input = "actually on\ntwo lines";
if (splitInto(input, flattened))
std::cout << format(*char_[safechars] % '|', flattened) << std::endl;
#endif
std::list<std::set<std::string> > linewise, custom;
// YAGNI #1 - now supports partially quoted columns
input = "partially q\"oute\"d columns";
if (splitInto(input, linewise))
std::cout << format(( "set[" << ("'" << *char_[safechars] << "'") % ", " << "]") % '\n', linewise) << std::endl;
// YAGNI #2 - now supports custom delimiter expressions
input="custom delimiters: 1997-03-14 10:13am";
if (splitInto(input, custom, +qi::char_("- 0-9:"))
&& splitInto(input, custom, +(qi::char_ - qi::char_("0-9"))))
std::cout << format(( "set[" << ("'" << *char_[safechars] << "'") % ", " << "]") % '\n', custom) << std::endl;
// YAGNI #3 - now supports quotes ("") inside quoted values (instead of just making them disappear)
input = "would like ne\"\"sted \"quotes like \"\"\n\"\" that\"";
custom.clear();
if (splitInto(input, custom, qi::char_("() ")))
std::cout << format(( "set[" << ("'" << *char_[safechars] << "'") % ", " << "]") % '\n', custom) << std::endl;
return 0;
}
@sehe
Copy link
Author

sehe commented Sep 20, 2011

  • YAGNI #1 - now supports partially quoted columns

This is the problem you reported: e.g. with a delimiter , only test,"one,two",three would be valid, not test,one","two","three

Now both are accepted

  • YAGNI #2 - now supports custom delimiter expressions

You could only specify single characters as delimiters. Now you can specify any Spirit Qi parser expression as the delimiter rule. E.g

  splitInto(input, output, ' ');                                 // single space
  splitInto(input, output, +qi.lit(' '));                      // one or more spaces
  splitInto(input, output, +qi.lit(" \t"));                   // one or more spaces or tabs
  splitInto(input, output, (qi::double_ >> !'#')        // -- anything at all :) (any floating point number that is not followed by a _hash_ sign)

Note this changes behaviour for the default overload

The old version treated repeated spaces as a single delimiter by default. You now have to explicitely specify that (2nd example) if you want it.

  • YAGNI #3 - now supports quotes ("") inside quoted values (instead of just making them disappear)

See the code sample. Quite simple of course. Note that the sequence "" outside a quoted construct still represents the empty string (for compatibility with e.g. existing CSV output formats which quote empty strings redundantly)

  • YAGNI #4 - support boost ranges in addition to containers as input (e.g. char[])

Well, you ain't gonna need it (but it was rather handy for me in order to just be able to write splitInto("a char array", ...) :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment