abenkovskii/main.cpp

## main.cpp
#include <string>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <cctype>

using std::string;
using std::istream;
using std::find;
using std::begin;
using std::end;

class Token {
	public:
		enum class Type
		{
			// kw
			PROGRAM,
			INT,
			STRING,
			BOOL,
			REAL,
			IF,
			ELSE,
			CASE,
			OF,
			END,
			DO,
			WHILE,
			READ,
			WRITE,
			BREAK,
			NOT,
			AND,
			OR,
			TRUE,
			FALSE,

			// delim (single char)
			OPEN_CURLY,
			CLOSE_CURLY,
			SEMICOLON,
			COMMA,
			OPEN_ROUND,
			CLOSE_ROUND,
			ASSIGN,
			COLON,
			MUL,
			DIV,
			MOD,
			PLUS,
			MINUS,
			LT,
			GT,

			// delim (multiple char)
			LE,
			GE,
			EQ,
			NEQ,

			// other
			ID,
			INT_CONST,
			STRING_CONST,
			REAL_CONST,
			SPACE,
			END_OF_FILE
		};

		Token(Type t):type_(t) {}
		Token(Type t, string s):type_(t), str_(s) {}
		Token(Type t, int i):type_(t), int_(i) {}
		Token(Type t, double d):type_(t), double_(d) {}

	private:
		Type type_;
		int int_;
		double double_;
		string str_;
};

namespace {
	Token::Type kw_tok[] = {
		Token::Type::PROGRAM,
		Token::Type::INT,
		Token::Type::STRING,
		Token::Type::BOOL,
		Token::Type::REAL,
		Token::Type::IF,
		Token::Type::ELSE,
		Token::Type::CASE,
		Token::Type::OF,
		Token::Type::END,
		Token::Type::DO,
		Token::Type::WHILE,
		Token::Type::READ,
		Token::Type::WRITE,
		Token::Type::BREAK,
		Token::Type::NOT,
		Token::Type::AND,
		Token::Type::OR,
		Token::Type::TRUE,
		Token::Type::FALSE
	};

	string kw[] = {
		"program",
		"int",
		"string",
		"bool",
		"real",
		"if",
		"else",
		"case",
		"of",
		"end",
		"do",
		"while",
		"read",
		"write",
		"break",
		"not",
		"and",
		"or",
		"true",
		"false"
	};
}

// XXX
typedef int UnexpectedCharacter;

int no_eof(int c) {
	if(c == EOF)
		throw UnexpectedCharacter(c);
	return c;
}

bool is_id_char(int c) {
	return isdigit(c) || ('a' <= c && c<= 'z') || ('A' <= c && c <= 'Z');
}

bool my_isspace(int c) {
	return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}

// XXX: can't find proof that one character putback is guarantied after reading
// XXX: are all this chars in the basic source char set?
Token next_token(istream &is) {
	int c = is.get();
	switch(c) {
		case '{': return Token::Type::OPEN_CURLY;
		case '}': return Token::Type::CLOSE_CURLY;
		case ';': return Token::Type::SEMICOLON;
		case ',': return Token::Type::COMMA;
		case '(': return Token::Type::OPEN_ROUND;
		case ')': return Token::Type::CLOSE_ROUND;
		case ':': return Token::Type::COLON;
		case '*': return Token::Type::MUL;
		case '%': return Token::Type::MOD;
		case '+': return Token::Type::PLUS;
		case '-': return Token::Type::MINUS;
		case EOF: return Token::Type::END_OF_FILE;
		case '=':
			if((c = no_eof(is.get())) == '=')
				return Token::Type::EQ;
			is.unget();
			return Token::Type::ASSIGN;
		case '<':
			if((c = no_eof(is.get())) == '=')
				return Token::Type::LE;
			is.unget();
			return Token::Type::LT;
		case '>':
			if((c = no_eof(is.get())) == '=')
				return Token::Type::GE;
			is.unget();
			return Token::Type::GT;
		case '!':
			if((c = no_eof(is.get())) == '=')
				return Token::Type::NEQ;
			throw UnexpectedCharacter(c);


		// "/*/" -- not a comment
		// "/* * */" -- comment
		// "/* * / */" -- comment
		// "/*   **/" -- comment
		case '/':
			if((c = no_eof(is.get())) != '*') {
				is.unget();
				return Token::Type::DIV;
			}

			goto comment;
comment:
			if(no_eof(is.get()) == '*')
				goto asterix_found;
			else
				goto comment;
asterix_found:
			switch(no_eof(is.get())) {
				case '/':
					goto done;
				case '*':
					goto asterix_found;
				default:
					goto comment;
			}
done:
			return Token::Type::SPACE;

		case '"':
			{
				string s;
				while((c = no_eof(is.get())) != '"')
					s.push_back(c);
				return Token(Token::Type::STRING_CONST, s);
			}

		default:
			if(my_isspace(c)) {
				while(my_isspace(c = is.get()))
					continue;
				if(c != EOF)
					is.unget();
				return Token::Type::SPACE;
			} else if(isdigit(c)) {
				// TODO: when writing parser don't forget that constants can have signs
				// bonus points: "- /* comment */ 3.14" is technically not a constant
				string s;
				for(;isdigit(c); c=no_eof(is.get()))
					s.push_back(c);
				if(c != '.') {
					is.unget();
					return Token(Token::Type::INT_CONST, stoi(s));
				}
				s.push_back('.');
				// XXX "314." should not be valid
				for(c=no_eof(is.get()); isdigit(c); c=no_eof(is.get()))
					s.push_back(c);
				is.unget();
				return Token(Token::Type::REAL_CONST, stod(s));
			} else if (is_id_char(c)) {
				string s;
				for(;is_id_char(c); c=no_eof(is.get()))
					s.push_back(c);
				is.unget();
				auto k = find(begin(kw), end(kw), s);
				if(k != end(kw))
					return Token(kw_tok[k - begin(kw)]);
				return Token(Token::Type::ID, s);
			}
			throw UnexpectedCharacter(c);
	}
}

int main() {

	return 0;
}
	#include <string>
	#include <iostream>
	#include <algorithm>
	#include <iterator>
	#include <cctype>

	using std::string;
	using std::istream;
	using std::find;
	using std::begin;
	using std::end;

	class Token {
	public:
	enum class Type
	{
	// kw
	PROGRAM,
	INT,
	STRING,
	BOOL,
	REAL,
	IF,
	ELSE,
	CASE,
	OF,
	END,
	DO,
	WHILE,
	READ,
	WRITE,
	BREAK,
	NOT,
	AND,
	OR,
	TRUE,
	FALSE,

	// delim (single char)
	OPEN_CURLY,
	CLOSE_CURLY,
	SEMICOLON,
	COMMA,
	OPEN_ROUND,
	CLOSE_ROUND,
	ASSIGN,
	COLON,
	MUL,
	DIV,
	MOD,
	PLUS,
	MINUS,
	LT,
	GT,

	// delim (multiple char)
	LE,
	GE,
	EQ,
	NEQ,

	// other
	ID,
	INT_CONST,
	STRING_CONST,
	REAL_CONST,
	SPACE,
	END_OF_FILE
	};

	Token(Type t):type_(t) {}
	Token(Type t, string s):type_(t), str_(s) {}
	Token(Type t, int i):type_(t), int_(i) {}
	Token(Type t, double d):type_(t), double_(d) {}

	private:
	Type type_;
	int int_;
	double double_;
	string str_;
	};

	namespace {
	Token::Type kw_tok[] = {
	Token::Type::PROGRAM,
	Token::Type::INT,
	Token::Type::STRING,
	Token::Type::BOOL,
	Token::Type::REAL,
	Token::Type::IF,
	Token::Type::ELSE,
	Token::Type::CASE,
	Token::Type::OF,
	Token::Type::END,
	Token::Type::DO,
	Token::Type::WHILE,
	Token::Type::READ,
	Token::Type::WRITE,
	Token::Type::BREAK,
	Token::Type::NOT,
	Token::Type::AND,
	Token::Type::OR,
	Token::Type::TRUE,
	Token::Type::FALSE
	};

	string kw[] = {
	"program",
	"int",
	"string",
	"bool",
	"real",
	"if",
	"else",
	"case",
	"of",
	"end",
	"do",
	"while",
	"read",
	"write",
	"break",
	"not",
	"and",
	"or",
	"true",
	"false"
	};
	}

	// XXX
	typedef int UnexpectedCharacter;

	int no_eof(int c) {
	if(c == EOF)
	throw UnexpectedCharacter(c);
	return c;
	}

	bool is_id_char(int c) {
	return isdigit(c) \|\| ('a' <= c && c<= 'z') \|\| ('A' <= c && c <= 'Z');
	}

	bool my_isspace(int c) {
	return c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r';
	}

	// XXX: can't find proof that one character putback is guarantied after reading
	// XXX: are all this chars in the basic source char set?
	Token next_token(istream &is) {
	int c = is.get();
	switch(c) {
	case '{': return Token::Type::OPEN_CURLY;
	case '}': return Token::Type::CLOSE_CURLY;
	case ';': return Token::Type::SEMICOLON;
	case ',': return Token::Type::COMMA;
	case '(': return Token::Type::OPEN_ROUND;
	case ')': return Token::Type::CLOSE_ROUND;
	case ':': return Token::Type::COLON;
	case '*': return Token::Type::MUL;
	case '%': return Token::Type::MOD;
	case '+': return Token::Type::PLUS;
	case '-': return Token::Type::MINUS;
	case EOF: return Token::Type::END_OF_FILE;
	case '=':
	if((c = no_eof(is.get())) == '=')
	return Token::Type::EQ;
	is.unget();
	return Token::Type::ASSIGN;
	case '<':
	if((c = no_eof(is.get())) == '=')
	return Token::Type::LE;
	is.unget();
	return Token::Type::LT;
	case '>':
	if((c = no_eof(is.get())) == '=')
	return Token::Type::GE;
	is.unget();
	return Token::Type::GT;
	case '!':
	if((c = no_eof(is.get())) == '=')
	return Token::Type::NEQ;
	throw UnexpectedCharacter(c);


	// "/*/" -- not a comment
	// "/* * */" -- comment
	// "/* * / */" -- comment
	// "/* **/" -- comment
	case '/':
	if((c = no_eof(is.get())) != '*') {
	is.unget();
	return Token::Type::DIV;
	}

	goto comment;
	comment:
	if(no_eof(is.get()) == '*')
	goto asterix_found;
	else
	goto comment;
	asterix_found:
	switch(no_eof(is.get())) {
	case '/':
	goto done;
	case '*':
	goto asterix_found;
	default:
	goto comment;
	}
	done:
	return Token::Type::SPACE;

	case '"':
	{
	string s;
	while((c = no_eof(is.get())) != '"')
	s.push_back(c);
	return Token(Token::Type::STRING_CONST, s);
	}

	default:
	if(my_isspace(c)) {
	while(my_isspace(c = is.get()))
	continue;
	if(c != EOF)
	is.unget();
	return Token::Type::SPACE;
	} else if(isdigit(c)) {
	// TODO: when writing parser don't forget that constants can have signs
	// bonus points: "- /* comment */ 3.14" is technically not a constant
	string s;
	for(;isdigit(c); c=no_eof(is.get()))
	s.push_back(c);
	if(c != '.') {
	is.unget();
	return Token(Token::Type::INT_CONST, stoi(s));
	}
	s.push_back('.');
	// XXX "314." should not be valid
	for(c=no_eof(is.get()); isdigit(c); c=no_eof(is.get()))
	s.push_back(c);
	is.unget();
	return Token(Token::Type::REAL_CONST, stod(s));
	} else if (is_id_char(c)) {
	string s;
	for(;is_id_char(c); c=no_eof(is.get()))
	s.push_back(c);
	is.unget();
	auto k = find(begin(kw), end(kw), s);
	if(k != end(kw))
	return Token(kw_tok[k - begin(kw)]);
	return Token(Token::Type::ID, s);
	}
	throw UnexpectedCharacter(c);
	}
	}

	int main() {

	return 0;
	}