Skip to content

Instantly share code, notes, and snippets.

@sehe
Last active April 1, 2018 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sehe/212ce5e3086eb3b26a6e6f806002f967 to your computer and use it in GitHub Desktop.
Save sehe/212ce5e3086eb3b26a6e6f806002f967 to your computer and use it in GitHub Desktop.
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iomanip>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/utility/string_ref.hpp>
#include <boost/container/flat_set.hpp>
#include <fstream>
#include <deque>
struct StringTable {
typedef boost::string_ref Atom;
typedef boost::container::flat_set<Atom> Index;
typedef std::deque<char> Store;
/* An insert in the middle of the deque invalidates all the iterators and
* references to elements of the deque. An insert at either end of the
* deque invalidates all the iterators to the deque, but has no effect on
* the validity of references to elements of the deque.
*/
Store backing;
Index index;
Atom intern(boost::string_ref const& key) {
Index::const_iterator it = index.find(key);
if (it == index.end()) {
Store::const_iterator match = std::search(
backing.begin(), backing.end(),
key.begin(), key.end());
if (match == backing.end()) {
size_t offset = backing.size();
backing.insert(backing.end(), key.begin(), key.end());
match = backing.begin() + offset;
}
it = index.insert(Atom(&*match, key.size())).first;
}
// return the Atom from backing store
return *it;
}
};
namespace MyEvents {
enum Kind { LOCATION, SLOPE, GEAR, DIR };
struct Timestamp {
std::time_t _rep;
double _fractional_seconds;
friend std::istream& operator>>(std::istream& is, Timestamp& o) {
struct ::tm tmp;
if (is >> std::get_time(&tmp, "%Y-%b-%d %H:%M:%S") >> o._fractional_seconds)
o._rep = std::mktime(&tmp);
return is;
}
friend std::ostream& operator<<(std::ostream& os, Timestamp const& o) {
return os << std::fixed << (o._rep + o._fractional_seconds);
}
};
typedef StringTable::Atom Atom;
struct LogRecord {
int driver;
double time;
double vel;
double km;
Timestamp date;
Atom road;
Kind kind;
double value;
};
typedef std::vector<LogRecord> LogRecords;
}
BOOST_FUSION_ADAPT_STRUCT(MyEvents::LogRecord,
(MyEvents::Timestamp, date)
(double, time)
(int, driver)
(double, vel)
(MyEvents::Atom, road)
(double, km)
(MyEvents::Kind, kind)
(double, value))
namespace qi = boost::spirit::qi;
namespace QiParsers {
template <typename It>
struct LogParser : qi::grammar<It, MyEvents::LogRecords()> {
LogParser(StringTable& strings) : LogParser::base_type(start), intern_(intern_f(strings)) {
using namespace qi;
kind.add
("SLOPE", MyEvents::SLOPE)
("GEAR", MyEvents::GEAR)
("DIR", MyEvents::DIR);
values.add("G1", 1.0)
("G2", 2.0)
("REVERSE", -1.0)
("NORTH", 1.0)
("EAST", 2.0)
("WEST", 3.0)
("SOUTH", 4.0);
atom = raw[+graph][_val = intern_(_1)];
line_record
= '[' >> stream >> ']'
>> " - " >> double_ >> " s"
>> " => Driver: " >> int_
>> " - Speed: " >> double_
>> " - Road: " >> atom
>> " - Km: " >> double_
>> (" - " >> kind >> ": " >> (double_ | values) | attr(MyEvents::LOCATION) >> attr(0.0));
start = line_record % eol;
BOOST_SPIRIT_DEBUG_NODES((start)(line_record)(atom))
}
private:
struct intern_f {
StringTable& _table;
typedef StringTable::Atom result_type;
explicit intern_f(StringTable& table) : _table(table) {}
StringTable::Atom operator()(boost::iterator_range<It> const& range) const {
return _table.intern(sequential(range));
}
private:
// be more efficient if It is const char*
static boost::string_ref sequential(boost::iterator_range<const char*> const& range) {
return boost::string_ref(range.begin(), range.size());
}
template <typename OtherIt>
static std::string sequential(boost::iterator_range<OtherIt> const& range) {
return std::string(range.begin(), range.end());
}
};
boost::phoenix::function<intern_f> intern_;
qi::rule<It, MyEvents::LogRecords()> start;
qi::rule<It, MyEvents::LogRecord()> line_record;
qi::rule<It, MyEvents::Atom()> atom;
qi::symbols<char, MyEvents::Kind> kind;
qi::symbols<char, double> values;
};
}
template <typename It>
bool parse_spirit(It b, It e, MyEvents::LogRecords& into, StringTable& strings) {
QiParsers::LogParser<It> parser(strings); // TODO optimize by not reconstructing all parser rules each time
return parse(b, e, parser, into);
}
bool parse_logfile(char const* fname, MyEvents::LogRecords& into, StringTable& strings) {
boost::iostreams::filtering_istream is;
is.push(boost::iostreams::gzip_decompressor());
std::ifstream ifs(fname, std::ios::binary);
is.push(ifs);
boost::spirit::istream_iterator f(is >> std::noskipws), l;
return parse_spirit(f, l, into, strings);
}
namespace MyEvents { // for debug/demo
using boost::fusion::operator<<;
static inline std::ostream& operator<<(std::ostream& os, Kind k) {
switch(k) {
case LOCATION: return os << "LOCATION";
case SLOPE: return os << "SLOPE";
case GEAR: return os << "GEAR";
case DIR: return os << "DIR";
}
return os;
}
}
int main(int argc, char **argv) {
StringTable strings;
MyEvents::LogRecords records;
for (char** arg = argv+1; *arg && (argv+argc != arg); ++arg) {
bool ok = parse_logfile(*arg, records, strings);
std::cout
<< "Parsing " << *arg << (ok?" - success" : " - errors")
<< " (" << records.size() << " records total)\n";
}
for (MyEvents::LogRecords::const_iterator it = records.begin(); it != records.end(); ++it)
std::cout << *it << "\n";
std::cout << "Interned strings: " << strings.index.size() << "\n";
std::cout << "Table backing: '";
std::copy(strings.backing.begin(), strings.backing.end(), std::ostreambuf_iterator<char>(std::cout));
std::cout << "'\n";
for (StringTable::Index::const_iterator it = strings.index.begin(); it != strings.index.end(); ++it) {
std::cout << " entry - " << *it << "\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment