Skip to content

Instantly share code, notes, and snippets.

@vladfaust
Last active September 10, 2019 20:21
Show Gist options
  • Save vladfaust/3a73e639cdbdd40746f44aa73e30c5d6 to your computer and use it in GitHub Desktop.
Save vladfaust/3a73e639cdbdd40746f44aa73e30c5d6 to your computer and use it in GitHub Desktop.
---
BasedOnStyle: LLVM
KeepEmptyLinesAtTheStartOfBlocks: false
BinPackArguments: false
BinPackParameters: false
AllowAllParametersOfDeclarationOnNextLine: true
BreakConstructorInitializers: AfterColon
AlignAfterOpenBracket: AlwaysBreak
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ColumnLimit: 100
SortIncludes: false
-IC:/LLVM/Source/include
-IC:/LLVM/Built/include
#pragma once
#include "./lexeme.cpp"
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <deque>
#include <stack>
using namespace std;
namespace Compiler {
// Reads input and yields tokens one-by-one.
// It's not a threadsafe class.
// The input must not be accessed simultaneously.
class Lexer {
istream *_input;
char _lastChar;
stack<char> _rpnOutput;
stack<char> _rpnBuffer;
bool _rpnMaybeCall;
bool _rpnWaitingForArg;
public:
Lexer(istream *input) : _input(input) { readChar(); }
// Parse the next lexeme. It uses RPN.
unique_ptr<Lexeme::Base> next() {
// Skip spaces
while (isspace(_lastChar) && _lastChar != '\n')
readChar();
// Read a number.
// It could start from the minus sign
if (isdigit(_lastChar) || _lastChar == '-' || _lastChar == '+') {
bool negative = false;
if (_lastChar == '-') {
negative = true;
readChar(); // Consume the minus sign
} else if (_lastChar == '+') {
readChar(); // Consume the plus sign
}
const bool firstZero = (_lastChar == '0');
string significand = readNumber();
// `0X` as a whole is an invalid number
if (significand.size() > 1 && firstZero)
throw Error("Numbers can not begin with zero");
int exponent = 0;
if (_lastChar == '.') {
readChar(); // Consume the dot
string digits = readNumber();
if (digits.empty())
throw Error("Expected digits after dot for a floating point number");
// For x.123, -3 is the exponent size
exponent = -1 * (int)digits.size();
// Special case `0.x`
if (significand.front() == '0')
significand = "";
// Shift digits until it is not zero
while (!digits.empty() && digits.front() == '0')
digits.erase(0, 1);
// Append the after-dot digits
significand += digits;
}
bool isExplicitLiteral = false;
if (_lastChar == '_') {
// Thats an explicit literal!
isExplicitLiteral = true;
readChar(); // Consume the `_`
}
char suffix = _lastChar;
switch (suffix) {
case 'u':
if (negative)
throw Error("Unsigned literals can not be negative");
case 'i': {
readChar(); // Consume the literal prefix
const string sizeString = readNumber();
if (sizeString.empty())
return make_unique<Lexeme::Int>(
Lexeme::Int(suffix == 'i', suffix == 'u', negative, NULL, significand));
else {
constexpr int sizes[] = {1, 2, 4, 8, 16, 32, 64, 128};
const int size = stoi(sizeString);
bool allowed = (find(begin(sizes), end(sizes), size) != end(sizes));
if (allowed)
return make_unique<Lexeme::Int>(
Lexeme::Int(suffix == 'i', suffix == 'u', negative, size, significand));
else
throw Error(
"Invalid integer bitsize " + to_string(size) +
", expected 1, 2, 4, 8, 16, 32, 64, 128");
}
}
case 'f': {
readChar(); // Consume the literal prefix
const string sizeString = readNumber();
if (sizeString.empty())
return make_unique<Lexeme::Float>(Lexeme::Float(negative, NULL, significand, exponent));
else {
constexpr int sizes[] = {16, 32, 64, 128};
const int size = stoi(sizeString);
bool allowed = (find(begin(sizes), end(sizes), size) != end(sizes));
if (allowed)
return make_unique<Lexeme::Float>(Lexeme::Float(negative, size, significand, exponent));
else
throw Error(
"Invalid floating point bitsize " + to_string(size) + ", expected 16, 32, 64,
128");
}
}
// Neither of the known literal suffixes matched
default:
if (isExplicitLiteral)
// Error if it's an explicit literal
throw Error("Malformed number literal. Expected suffixes i, u, f");
else {
if (isalpha(_lastChar))
// If a number is immediately followed by some a-zA-Z char, that's a error!
throw Error("Malformed number literal");
else if (exponent)
// That is an implicit float
return make_unique<Lexeme::Float>(Lexeme::Float(negative, NULL, significand, exponent));
else
// That is an implicit integer
return make_unique<Lexeme::Int>(Lexeme::Int(false, false, negative, NULL, significand));
}
}
}
// Read an alphanumeric identifier starting with alpha (letter) or "_"
if (isalpha(_lastChar) || _lastChar == '_') {
string value = readIndentifier();
auto id = make_unique<Lexeme::Identifier>(value);
while (isspace(_lastChar) && _lastChar != '\n')
readChar();
// Is it an assignment?
if (_lastChar == '=') {
readChar(); // Consume the '=' sign
auto assigned = next();
return make_unique<Lexeme::Assignment>(move(id), move(assigned));
}
return id;
}
if (_lastChar == '\n')
return make_unique<Lexeme::Newline>();
if (_lastChar == EOF)
return make_unique<Lexeme::Eof>();
throw Error("Unknown token");
};
private:
// Read the next char from the input.
char readChar() { return _lastChar = _input->get(); }
// Read an arbitrary size number from the input,
// starting from the `_lastChar`.
string readNumber() {
string buffer;
while (isdigit(_lastChar)) {
buffer += _lastChar;
readChar();
};
return buffer;
}
// Read an arbitrary alphanumeric identifier.
string readIndentifier() {
string buffer;
while (isalnum(_lastChar) || _lastChar == '_') {
buffer += _lastChar;
readChar();
};
return buffer;
}
}; // namespace Compiler
} // namespace Compiler
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment