This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//#define BOOST_SPIRIT_DEBUG | |
#include <boost/fusion/adapted/struct.hpp> | |
#include <boost/spirit/include/qi.hpp> | |
#include <boost/spirit/include/phoenix.hpp> | |
#include <iomanip> | |
#include <boost/iostreams/filtering_stream.hpp> | |
#include <boost/iostreams/filter/gzip.hpp> | |
#include <boost/utility/string_ref.hpp> | |
#include <boost/container/flat_set.hpp> | |
#include <fstream> | |
#include <deque> | |
struct StringTable { | |
typedef boost::string_ref Atom; | |
typedef boost::container::flat_set<Atom> Index; | |
typedef std::deque<char> Store; | |
/* An insert in the middle of the deque invalidates all the iterators and | |
* references to elements of the deque. An insert at either end of the | |
* deque invalidates all the iterators to the deque, but has no effect on | |
* the validity of references to elements of the deque. | |
*/ | |
Store backing; | |
Index index; | |
Atom intern(boost::string_ref const& key) { | |
Index::const_iterator it = index.find(key); | |
if (it == index.end()) { | |
Store::const_iterator match = std::search( | |
backing.begin(), backing.end(), | |
key.begin(), key.end()); | |
if (match == backing.end()) { | |
size_t offset = backing.size(); | |
backing.insert(backing.end(), key.begin(), key.end()); | |
match = backing.begin() + offset; | |
} | |
it = index.insert(Atom(&*match, key.size())).first; | |
} | |
// return the Atom from backing store | |
return *it; | |
} | |
}; | |
namespace MyEvents { | |
enum Kind { LOCATION, SLOPE, GEAR, DIR }; | |
struct Timestamp { | |
std::time_t _rep; | |
double _fractional_seconds; | |
friend std::istream& operator>>(std::istream& is, Timestamp& o) { | |
struct ::tm tmp; | |
if (is >> std::get_time(&tmp, "%Y-%b-%d %H:%M:%S") >> o._fractional_seconds) | |
o._rep = std::mktime(&tmp); | |
return is; | |
} | |
friend std::ostream& operator<<(std::ostream& os, Timestamp const& o) { | |
return os << std::fixed << (o._rep + o._fractional_seconds); | |
} | |
}; | |
typedef StringTable::Atom Atom; | |
struct LogRecord { | |
int driver; | |
double time; | |
double vel; | |
double km; | |
Timestamp date; | |
Atom road; | |
Kind kind; | |
double value; | |
}; | |
typedef std::vector<LogRecord> LogRecords; | |
} | |
BOOST_FUSION_ADAPT_STRUCT(MyEvents::LogRecord, | |
(MyEvents::Timestamp, date) | |
(double, time) | |
(int, driver) | |
(double, vel) | |
(MyEvents::Atom, road) | |
(double, km) | |
(MyEvents::Kind, kind) | |
(double, value)) | |
namespace qi = boost::spirit::qi; | |
namespace QiParsers { | |
template <typename It> | |
struct LogParser : qi::grammar<It, MyEvents::LogRecords()> { | |
LogParser(StringTable& strings) : LogParser::base_type(start), intern_(intern_f(strings)) { | |
using namespace qi; | |
kind.add | |
("SLOPE", MyEvents::SLOPE) | |
("GEAR", MyEvents::GEAR) | |
("DIR", MyEvents::DIR); | |
values.add("G1", 1.0) | |
("G2", 2.0) | |
("REVERSE", -1.0) | |
("NORTH", 1.0) | |
("EAST", 2.0) | |
("WEST", 3.0) | |
("SOUTH", 4.0); | |
atom = raw[+graph][_val = intern_(_1)]; | |
line_record | |
= '[' >> stream >> ']' | |
>> " - " >> double_ >> " s" | |
>> " => Driver: " >> int_ | |
>> " - Speed: " >> double_ | |
>> " - Road: " >> atom | |
>> " - Km: " >> double_ | |
>> (" - " >> kind >> ": " >> (double_ | values) | attr(MyEvents::LOCATION) >> attr(0.0)); | |
start = line_record % eol; | |
BOOST_SPIRIT_DEBUG_NODES((start)(line_record)(atom)) | |
} | |
private: | |
struct intern_f { | |
StringTable& _table; | |
typedef StringTable::Atom result_type; | |
explicit intern_f(StringTable& table) : _table(table) {} | |
StringTable::Atom operator()(boost::iterator_range<It> const& range) const { | |
return _table.intern(sequential(range)); | |
} | |
private: | |
// be more efficient if It is const char* | |
static boost::string_ref sequential(boost::iterator_range<const char*> const& range) { | |
return boost::string_ref(range.begin(), range.size()); | |
} | |
template <typename OtherIt> | |
static std::string sequential(boost::iterator_range<OtherIt> const& range) { | |
return std::string(range.begin(), range.end()); | |
} | |
}; | |
boost::phoenix::function<intern_f> intern_; | |
qi::rule<It, MyEvents::LogRecords()> start; | |
qi::rule<It, MyEvents::LogRecord()> line_record; | |
qi::rule<It, MyEvents::Atom()> atom; | |
qi::symbols<char, MyEvents::Kind> kind; | |
qi::symbols<char, double> values; | |
}; | |
} | |
template <typename It> | |
bool parse_spirit(It b, It e, MyEvents::LogRecords& into, StringTable& strings) { | |
QiParsers::LogParser<It> parser(strings); // TODO optimize by not reconstructing all parser rules each time | |
return parse(b, e, parser, into); | |
} | |
bool parse_logfile(char const* fname, MyEvents::LogRecords& into, StringTable& strings) { | |
boost::iostreams::filtering_istream is; | |
is.push(boost::iostreams::gzip_decompressor()); | |
std::ifstream ifs(fname, std::ios::binary); | |
is.push(ifs); | |
boost::spirit::istream_iterator f(is >> std::noskipws), l; | |
return parse_spirit(f, l, into, strings); | |
} | |
namespace MyEvents { // for debug/demo | |
using boost::fusion::operator<<; | |
static inline std::ostream& operator<<(std::ostream& os, Kind k) { | |
switch(k) { | |
case LOCATION: return os << "LOCATION"; | |
case SLOPE: return os << "SLOPE"; | |
case GEAR: return os << "GEAR"; | |
case DIR: return os << "DIR"; | |
} | |
return os; | |
} | |
} | |
int main(int argc, char **argv) { | |
StringTable strings; | |
MyEvents::LogRecords records; | |
for (char** arg = argv+1; *arg && (argv+argc != arg); ++arg) { | |
bool ok = parse_logfile(*arg, records, strings); | |
std::cout | |
<< "Parsing " << *arg << (ok?" - success" : " - errors") | |
<< " (" << records.size() << " records total)\n"; | |
} | |
for (MyEvents::LogRecords::const_iterator it = records.begin(); it != records.end(); ++it) | |
std::cout << *it << "\n"; | |
std::cout << "Interned strings: " << strings.index.size() << "\n"; | |
std::cout << "Table backing: '"; | |
std::copy(strings.backing.begin(), strings.backing.end(), std::ostreambuf_iterator<char>(std::cout)); | |
std::cout << "'\n"; | |
for (StringTable::Index::const_iterator it = strings.index.begin(); it != strings.index.end(); ++it) { | |
std::cout << " entry - " << *it << "\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment