Last active
September 10, 2019 20:21
-
-
Save vladfaust/3a73e639cdbdd40746f44aa73e30c5d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
BasedOnStyle: LLVM | |
KeepEmptyLinesAtTheStartOfBlocks: false | |
BinPackArguments: false | |
BinPackParameters: false | |
AllowAllParametersOfDeclarationOnNextLine: true | |
BreakConstructorInitializers: AfterColon | |
AlignAfterOpenBracket: AlwaysBreak | |
ConstructorInitializerAllOnOneLineOrOnePerLine: true | |
ColumnLimit: 100 | |
SortIncludes: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-IC:/LLVM/Source/include | |
-IC:/LLVM/Built/include |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include "./lexeme.cpp" | |
#include <iostream> | |
#include <sstream> | |
#include <string> | |
#include <vector> | |
#include <deque> | |
#include <stack> | |
using namespace std; | |
namespace Compiler { | |
// Reads input and yields tokens one-by-one. | |
// It's not a threadsafe class. | |
// The input must not be accessed simultaneously. | |
class Lexer { | |
istream *_input; | |
char _lastChar; | |
stack<char> _rpnOutput; | |
stack<char> _rpnBuffer; | |
bool _rpnMaybeCall; | |
bool _rpnWaitingForArg; | |
public: | |
Lexer(istream *input) : _input(input) { readChar(); } | |
// Parse the next lexeme. It uses RPN. | |
unique_ptr<Lexeme::Base> next() { | |
// Skip spaces | |
while (isspace(_lastChar) && _lastChar != '\n') | |
readChar(); | |
// Read a number. | |
// It could start from the minus sign | |
if (isdigit(_lastChar) || _lastChar == '-' || _lastChar == '+') { | |
bool negative = false; | |
if (_lastChar == '-') { | |
negative = true; | |
readChar(); // Consume the minus sign | |
} else if (_lastChar == '+') { | |
readChar(); // Consume the plus sign | |
} | |
const bool firstZero = (_lastChar == '0'); | |
string significand = readNumber(); | |
// `0X` as a whole is an invalid number | |
if (significand.size() > 1 && firstZero) | |
throw Error("Numbers can not begin with zero"); | |
int exponent = 0; | |
if (_lastChar == '.') { | |
readChar(); // Consume the dot | |
string digits = readNumber(); | |
if (digits.empty()) | |
throw Error("Expected digits after dot for a floating point number"); | |
// For x.123, -3 is the exponent size | |
exponent = -1 * (int)digits.size(); | |
// Special case `0.x` | |
if (significand.front() == '0') | |
significand = ""; | |
// Shift digits until it is not zero | |
while (!digits.empty() && digits.front() == '0') | |
digits.erase(0, 1); | |
// Append the after-dot digits | |
significand += digits; | |
} | |
bool isExplicitLiteral = false; | |
if (_lastChar == '_') { | |
// Thats an explicit literal! | |
isExplicitLiteral = true; | |
readChar(); // Consume the `_` | |
} | |
char suffix = _lastChar; | |
switch (suffix) { | |
case 'u': | |
if (negative) | |
throw Error("Unsigned literals can not be negative"); | |
case 'i': { | |
readChar(); // Consume the literal prefix | |
const string sizeString = readNumber(); | |
if (sizeString.empty()) | |
return make_unique<Lexeme::Int>( | |
Lexeme::Int(suffix == 'i', suffix == 'u', negative, NULL, significand)); | |
else { | |
constexpr int sizes[] = {1, 2, 4, 8, 16, 32, 64, 128}; | |
const int size = stoi(sizeString); | |
bool allowed = (find(begin(sizes), end(sizes), size) != end(sizes)); | |
if (allowed) | |
return make_unique<Lexeme::Int>( | |
Lexeme::Int(suffix == 'i', suffix == 'u', negative, size, significand)); | |
else | |
throw Error( | |
"Invalid integer bitsize " + to_string(size) + | |
", expected 1, 2, 4, 8, 16, 32, 64, 128"); | |
} | |
} | |
case 'f': { | |
readChar(); // Consume the literal prefix | |
const string sizeString = readNumber(); | |
if (sizeString.empty()) | |
return make_unique<Lexeme::Float>(Lexeme::Float(negative, NULL, significand, exponent)); | |
else { | |
constexpr int sizes[] = {16, 32, 64, 128}; | |
const int size = stoi(sizeString); | |
bool allowed = (find(begin(sizes), end(sizes), size) != end(sizes)); | |
if (allowed) | |
return make_unique<Lexeme::Float>(Lexeme::Float(negative, size, significand, exponent)); | |
else | |
throw Error( | |
"Invalid floating point bitsize " + to_string(size) + ", expected 16, 32, 64, | |
128"); | |
} | |
} | |
// Neither of the known literal suffixes matched | |
default: | |
if (isExplicitLiteral) | |
// Error if it's an explicit literal | |
throw Error("Malformed number literal. Expected suffixes i, u, f"); | |
else { | |
if (isalpha(_lastChar)) | |
// If a number is immediately followed by some a-zA-Z char, that's a error! | |
throw Error("Malformed number literal"); | |
else if (exponent) | |
// That is an implicit float | |
return make_unique<Lexeme::Float>(Lexeme::Float(negative, NULL, significand, exponent)); | |
else | |
// That is an implicit integer | |
return make_unique<Lexeme::Int>(Lexeme::Int(false, false, negative, NULL, significand)); | |
} | |
} | |
} | |
// Read an alphanumeric identifier starting with alpha (letter) or "_" | |
if (isalpha(_lastChar) || _lastChar == '_') { | |
string value = readIndentifier(); | |
auto id = make_unique<Lexeme::Identifier>(value); | |
while (isspace(_lastChar) && _lastChar != '\n') | |
readChar(); | |
// Is it an assignment? | |
if (_lastChar == '=') { | |
readChar(); // Consume the '=' sign | |
auto assigned = next(); | |
return make_unique<Lexeme::Assignment>(move(id), move(assigned)); | |
} | |
return id; | |
} | |
if (_lastChar == '\n') | |
return make_unique<Lexeme::Newline>(); | |
if (_lastChar == EOF) | |
return make_unique<Lexeme::Eof>(); | |
throw Error("Unknown token"); | |
}; | |
private: | |
// Read the next char from the input. | |
char readChar() { return _lastChar = _input->get(); } | |
// Read an arbitrary size number from the input, | |
// starting from the `_lastChar`. | |
string readNumber() { | |
string buffer; | |
while (isdigit(_lastChar)) { | |
buffer += _lastChar; | |
readChar(); | |
}; | |
return buffer; | |
} | |
// Read an arbitrary alphanumeric identifier. | |
string readIndentifier() { | |
string buffer; | |
while (isalnum(_lastChar) || _lastChar == '_') { | |
buffer += _lastChar; | |
readChar(); | |
}; | |
return buffer; | |
} | |
}; // namespace Compiler | |
} // namespace Compiler |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment