Skip to content

Instantly share code, notes, and snippets.

@ichaos
Last active December 29, 2015 15:29
Show Gist options
  • Save ichaos/7691277 to your computer and use it in GitHub Desktop.
Save ichaos/7691277 to your computer and use it in GitHub Desktop.
Google protocol buffer code reading
// These "character classes" are designed to be used in template methods.
// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
// whitespace.
// Note: No class is allowed to contain '\0', since this is used to mark end-
// of-input and is handled specially.
#define CHARACTER_CLASS(NAME, EXPRESSION) \
class NAME { \
public: \
static inline bool InClass(char c) { \
return EXPRESSION; \
} \
}
CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
c == '\r' || c == '\v' || c == '\f');
CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' ||
c == '\r' || c == '\v' || c == '\f');
CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') ||
('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F'));
CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
(c == '_'));
CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9') ||
(c == '_'));
CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' ||
c == 'r' || c == 't' || c == 'v' || c == '\\' ||
c == '?' || c == '\'' || c == '\"');
#undef CHARACTER_CLASS
/**
* Lessons Learned:
* 1. Encapsulate basic and usual operations so that
* we can write code like write English sentence
* 2. Using template, metaprogramming
* 3. readable code !
*/
//Eat one character of special type
template<typename CharacterClass>
inline bool Tokenizer::TryConsumeOne() {
if (CharacterClass::InClass(current_char_)) {
NextChar();
return true;
} else {
return false;
}
}
//Eat one special character
inline bool Tokenizer::TryConsume(char c) {
if (current_char_ == c) {
NextChar();
return true;
} else {
return false;
}
}
/**
* Eat number characters from input stream
* Support integers, floats, hex digit, octal digit
*
* Basic operations:
* TryConsume
* LookingAt<typename>
* ConsumeOneOrMore
* ConsumeZeroOrMore
*/
Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
bool started_with_dot) {
bool is_float = false;
if (started_with_zero && (TryConsume('x') || TryConsume('X'))) {
// A hex number (started with "0x").
ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");
} else if (started_with_zero && LookingAt<Digit>()) {
// An octal number (had a leading zero).
ConsumeZeroOrMore<OctalDigit>();
if (LookingAt<Digit>()) {
AddError("Numbers starting with leading zero must be in octal.");
ConsumeZeroOrMore<Digit>();
}
} else {
// A decimal number.
if (started_with_dot) {
is_float = true;
ConsumeZeroOrMore<Digit>();
} else {
ConsumeZeroOrMore<Digit>();
if (TryConsume('.')) {
is_float = true;
ConsumeZeroOrMore<Digit>();
}
}
if (TryConsume('e') || TryConsume('E')) {
is_float = true;
TryConsume('-') || TryConsume('+');
ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
}
if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) {
is_float = true;
}
}
if (LookingAt<Letter>()) {
AddError("Need space between number and identifier.");
} else if (current_char_ == '.') {
if (is_float) {
AddError(
"Already saw decimal point or exponent; can't have another one.");
} else {
AddError("Hex and octal numbers must be integers.");
}
}
return is_float ? TYPE_FLOAT : TYPE_INTEGER;
}
/**
* Consume string from input stream
*/
void Tokenizer::ConsumeString(char delimiter) {
while (true) {
switch (current_char_) {
case '\0':
case '\n': {
AddError("String literals cannot cross line boundaries.");
return;
}
case '\\': {
// An escape sequence.
NextChar();
if (TryConsumeOne<Escape>()) {
// Valid escape sequence.
} else if (TryConsumeOne<OctalDigit>()) {
// Possibly followed by two more octal digits, but these will
// just be consumed by the main loop anyway so we don't need
// to do so explicitly here.
} else if (TryConsume('x') || TryConsume('X')) {
if (!TryConsumeOne<HexDigit>()) {
AddError("Expected hex digits for escape sequence.");
}
// Possibly followed by another hex digit, but again we don't care.
} else if (TryConsume('u')) {
if (!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>()) {
AddError("Expected four hex digits for \\u escape sequence.");
}
} else if (TryConsume('U')) {
// We expect 8 hex digits; but only the range up to 0x10ffff is
// legal.
if (!TryConsume('0') ||
!TryConsume('0') ||
!(TryConsume('0') || TryConsume('1')) ||
!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>() ||
!TryConsumeOne<HexDigit>()) {
AddError("Expected eight hex digits up to 10ffff for \\U escape "
"sequence");
}
} else {
AddError("Invalid escape sequence in string literal.");
}
break;
}
default: {
if (current_char_ == delimiter) {
NextChar();
return;
}
NextChar();
break;
}
}
}
}
/**
* Like it! :)
*/
#define GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
TypeName(const TypeName&); \
void operator=(const TypeName&)
class CommandLineInterface {
private:
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CommandLineInterface);
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment