Skip to content

Instantly share code, notes, and snippets.

@amyinorbit
Created December 10, 2019 17:04
Show Gist options
  • Save amyinorbit/ac3b401c78c4fbe74838fcbedb54f0e8 to your computer and use it in GitHub Desktop.
Save amyinorbit/ac3b401c78c4fbe74838fcbedb54f0e8 to your computer and use it in GitHub Desktop.
//
// main.cpp
// formatter
//
// Created by Amy Parent on 12/10/2019.
// Copyright © 2019 Amy Parent. All rights reserved.
//
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <filesystem>
#include <iterator>
#include <set>
#include <vector>
#include <apfun/maybe.hpp>
#include <apfun/ranges.hpp>
using namespace amyinorbit;
using amyinorbit::maybe;
struct Token {
enum Kind {
select,
where,
braceL,
braceR,
star,
constant,
is,
isnot,
logAnd,
logOr,
field,
keyword,
resource
} kind;
std::string_view value;
};
constexpr bool operator==(const Token& left, const Token& right) {
return left.kind == right.kind && left.value == right.value;
}
constexpr bool operator==(const Token& left, const std::string& right) {
return left.value == right;
}
const char* name(const Token& token) {
switch (token.kind) {
case Token::select: return "kw_select";
case Token::where: return "kw_where";
case Token::braceL: return "l_brace";
case Token::braceR: return "r_brace";
case Token::star: return "star";
case Token::constant: return "constant";
case Token::is: return "op_is";
case Token::isnot: return "op_is_not";
case Token::logAnd: return "op_and";
case Token::logOr: return "op_or";
case Token::field: return "field_name";
case Token::keyword: return "keyword";
case Token::resource: return "resource";
}
return "INVALID";
}
std::ostream& operator<<(std::ostream& out, const Token& token) {
out << "[" << name(token) << "] '" << token.value << "'";
return out;
}
struct Record {
std::string ident;
std::string region;
maybe<std::string> name;
maybe<std::string> airport;
};
constexpr static struct end_tag_t {} end_tag;
constexpr static struct begin_tag_t {} begin_tag;
class Scanner {
public:
using value_type = maybe<Token>;
using reference = value_type;
using pointer = value_type*;
using difference_type = int;
using iterator_category = std::forward_iterator_tag;
explicit Scanner(const std::string& source) : source_(source) {}
Scanner(begin_tag_t, const std::string& source) : source_(source), index_(0) { next(); }
Scanner(end_tag_t, const std::string& source) : source_(source), index_(source.size()+1) {}
bool operator==(const Scanner& other) const {
return other.source_.data() == source_.data() && other.index_ == index_;
}
bool operator!=(const Scanner& other) const {
return !(*this == other);
}
value_type operator*() const {
return current_;
}
Scanner operator++() {
Scanner r = *this;
next();
return r;
}
Scanner& operator++(int) {
next();
return *this;
}
maybe<Token> next() {
while(index_ < source_.size()) {
auto start = index_;
char c = source_[index_++];
switch(c) {
// whitespace, we just loop
case 0x0020:
case 0x000d:
case 0x0009:
case 0x000b:
case 0x000c:
case 0x000a:
break;
case '(': return produce(Token::braceL);
case ')': return produce(Token::braceL);
case '!': return withNext('=', Token::isnot);
case '<': return withNext('>', Token::isnot);
case '=': return withNext('=', Token::is);
case '&': return withNext('&', Token::logAnd);
case '|': return withNext('|', Token::logOr);
case '*': return produce(Token::star);
case '"': return string(start);
default: return identifier(start);
}
}
index_ = source_.size()+1;
return nothing();
}
maybe<Token> current() const {
return current_;
}
private:
maybe<Token> withNext(char next, Token::Kind kind) {
if(index_ >= source_.size()) return nothing();
if(source_[index_++] != next) return nothing();
current_ = Token{kind, std::string_view(source_.data() + index_ - 2, 2)};
return current_;
}
maybe<Token> produce(Token::Kind kind, std::size_t start = -1) {
start = start == -1 ? index_ - 1 : start;
current_ = Token{kind, std::string_view(source_.data() + start, index_ - start)};
return current_;
}
maybe<Token> string(std::size_t start) {
while(index_ < source_.size() && source_[index_] != '"') {
index_++;
}
if(index_ < source_.size()) {
index_++;
auto length = (index_ - start) - 2;
current_ = Token{Token::constant, std::string_view(source_.data() + start + 1, length)};
return current_;
} else {
return nothing();
}
}
maybe<Token> identifier(std::size_t start) {
while(index_ < source_.size() && isIdentifier(source_[index_])) {
index_++;
}
auto length = index_ - start;
std::string_view ident(source_.data() + start, length);
Token::Kind kind = Token::constant;
if(ident == "select")
kind = Token::select;
else if(ident == "where")
kind = Token::where;
else if(ident == "and")
kind = Token::logAnd;
else if(ident == "or")
kind = Token::logOr;
else if(ident == "is")
kind = Token::is;
else if(ident == "airports" || ident == "waypoints" || ident == "navaids" || ident == "airways")
kind = Token::resource;
else if(ident == "name" || ident == "icao" || ident == "region" || ident == "airport")
kind = Token::field;
current_ = Token{kind, ident};
return current_;
}
bool isIdentifier(char c) {
return c == '_'
|| c == '-'
|| (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9');
}
std::string_view source_;
std::size_t index_ = 0;
maybe<Token> current_ = nothing();
};
class Parser {
public:
Parser(const std::string& source) : scanner(source) {}
bool failed() const { return failed_; }
protected:
bool is(Token::Kind kind) const {
if(!scanner.current()) return false;
return scanner.current()->kind == kind;
}
bool is(const std::set<Token::Kind>& kind) const {
if(!scanner.current()) return false;
return kind.count(scanner.current()->kind);
}
maybe<Token> match(Token::Kind kind) {
if(is(kind)) {
auto t = *scanner;
scanner.next();
return t;
}
return nothing();
}
maybe<Token> match(const std::set<Token::Kind>& kind) {
if(is(kind)) {
auto t = *scanner;
scanner.next();
return t;
}
return nothing();
}
void eat() {
scanner.next();
}
maybe<Token> expect(Token::Kind kind, const std::string& message = "unexpected token") {
if(failed_) {
while(*scanner && !is(kind)) ++scanner;
auto t = *scanner;
scanner.next();
return t;
} else {
if(is(kind)) {
auto t = *scanner;
scanner.next();
return t;
}
error(message);
return nothing();
}
}
maybe<Token> expect(const std::set<Token::Kind> kind, const std::string& message = "unexpected token") {
if(failed_) {
while(*scanner && !is(kind)) ++scanner;
auto t = *scanner;
scanner.next();
return t;
} else {
if(is(kind)) {
auto t = *scanner;
scanner.next();
return t;
}
error(message);
return nothing();
}
}
bool any() {
return scanner.next().has_value();
}
// MARK: - parsing stuff
protected:
void error(const std::string& message) {
if(failed_) return;
std::cerr << "error: " << message << "\n";
std::cerr << " > " << scanner.current() << "\n";
failed_ = true;
}
Scanner scanner;
private:
bool failed_ = false;
};
struct Predicate {
enum opcode {
op_eq,
op_neq,
op_and,
op_or,
op_halt,
op_fname,
op_ficao,
op_fregion,
op_fairport,
};
bool run(const Record& rec) {
ip = 0;
sp = 0;
bool a, b;
while(ip < bytecode.size()) {
opcode inst = (opcode)read();
switch (inst) {
case op_eq:
push(is(rec, (opcode)read(), read()));
break;
case op_neq:
push(!is(rec, (opcode)read(), read()));
break;
case op_and:
a = pop();
b = pop();
push(a && b);
break;
case op_or:
a = pop();
b = pop();
push(a || b);
break;
case op_halt:
return false;
break;
default:
break;
}
}
assert(sp < 64);
return pop();
}
void emitOp(opcode op) {
bytecode.push_back(op);
}
void emitConst(const std::string_view& string) {
auto it = std::find(constants.begin(), constants.end(), string);
if(it == constants.end()) {
bytecode.push_back((std::uint8_t)constants.size());
constants.push_back(std::string(string.begin(), string.end()));
} else {
bytecode.push_back((std::uint8_t)(it - constants.begin()));
}
}
private:
bool is(const Record& rec, opcode field, std::uint8_t idx) {
switch (field) {
case op_fname: return rec.name == constants[idx];
case op_ficao: return rec.ident == constants[idx];
case op_fregion: return rec.region == constants[idx];
case op_fairport: return rec.airport == constants[idx];
default: abort();
}
return false;
}
std::vector<std::string> constants;
std::vector<std::uint8_t> bytecode;
void push(bool value) {
stack[sp++] = value;
}
bool pop() {
return stack[--sp];
}
std::uint8_t read() {
return bytecode[ip++];
}
bool stack[64];
int sp, ip;
};
class QueryParser : public Parser {
public:
using Parser::Parser;
maybe<Predicate> statement() {
scanner.next();
select();
if(failed()) return nothing();
return pred;
}
private:
void select() {
expect(Token::select);
auto res = expect(Token::resource);
if(match(Token::star)) {
// flip a flag for detailed records
}
if(match(Token::where)) whereClause();
}
void whereClause() {
expression();
}
void expression() {
factor();
maybe<Token> op;
while((op = match({Token::logAnd, Token::logOr}))) {
factor();
pred.emitOp(makeLogOp(op));
}
}
void factor() {
if(match(Token::braceL)) {
expression();
expect(Token::braceR);
} else {
auto field = expect(Token::field);
auto op = expect({Token::is, Token::isnot});
auto constant = expect(Token::constant);
pred.emitOp(makeRelOp(op));
pred.emitOp(makeField(field));
pred.emitConst(constant->value);
}
}
static Predicate::opcode makeRelOp(const maybe<Token>& tok) {
if(!tok) return Predicate::op_halt;
if(tok->kind == Token::is) return Predicate::op_eq;
if(tok->kind == Token::isnot) return Predicate::op_neq;
return Predicate::op_halt;
}
static Predicate::opcode makeLogOp(const maybe<Token>& tok) {
if(!tok) return Predicate::op_halt;
if(tok->kind == Token::logAnd) return Predicate::op_and;
if(tok->kind == Token::logOr) return Predicate::op_or;
return Predicate::op_halt;
}
static Predicate::opcode makeField(const maybe<Token>& tok) {
if(!tok) return Predicate::op_halt;
if(tok->kind != Token::field) return Predicate::op_halt;
if(*tok == "icao") return Predicate::op_ficao;
if(*tok == "region") return Predicate::op_fregion;
if(*tok == "name") return Predicate::op_fname;
if(*tok == "airport") return Predicate::op_fairport;
return Predicate::op_halt;
}
Predicate pred;
};
int main() {
std::vector<Record> records = {
{"EGPN", "EG", "Dundee"},
{"EGPH", "EG", "Dundee"},
{"EGPF", "EG", "Glasgow"},
{"LSGG", "LS", "Geneve Cointrin"},
{"LFPG", "LF", "Paris Charles-de-Gaulle"},
{"LFPO", "LF", "Paris Orly"}
};
std::string source;
while(std::getline(std::cin, source, '\n')) {
// std::cout << "source: " << source << "\n";
QueryParser p(source);
auto pred = p.statement();
if(!pred) continue;
for(const auto& rec: records) {
if(!pred->run(rec)) continue;
std::cout << "match: " << rec.ident << ", " << rec.region << "\n";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment