ichaos/ConsumeNumber.cpp

## charClass.cpp
// These "character classes" are designed to be used in template methods.
// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
// whitespace.

// Note:  No class is allowed to contain '\0', since this is used to mark end-
//   of-input and is handled specially.

#define CHARACTER_CLASS(NAME, EXPRESSION)      \
  class NAME {                                 \
   public:                                     \
    static inline bool InClass(char c) {       \
      return EXPRESSION;                       \
    }                                          \
  }

CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
                            c == '\r' || c == '\v' || c == '\f');
CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' ||
                                     c == '\r' || c == '\v' || c == '\f');

CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');

CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') ||
                          ('a' <= c && c <= 'f') ||
                          ('A' <= c && c <= 'F'));

CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') ||
                        ('A' <= c && c <= 'Z') ||
                        (c == '_'));

CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') ||
                              ('A' <= c && c <= 'Z') ||
                              ('0' <= c && c <= '9') ||
                              (c == '_'));

CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' ||
                        c == 'r' || c == 't' || c == 'v' || c == '\\' ||
                        c == '?' || c == '\'' || c == '\"');

#undef CHARACTER_CLASS

## ConsumeNumber.cpp
/**
 * Lessons Learned:
 *  1. Encapsulate basic and usual operations so that
 *    we can write code like write English sentence
 *  2. Using template, metaprogramming
 *  3. readable code !
 */

//Eat one character of special type
template<typename CharacterClass>
inline bool Tokenizer::TryConsumeOne() {
  if (CharacterClass::InClass(current_char_)) {
    NextChar();
    return true;
  } else {
    return false;
  }
}

//Eat one special character
inline bool Tokenizer::TryConsume(char c) {
  if (current_char_ == c) {
    NextChar();
    return true;
  } else {
    return false;
  }
}

/**
 * Eat number characters from input stream
 * Support integers, floats, hex digit, octal digit
 *
 * Basic operations:
 *  TryConsume
 *  LookingAt<typename>
 *  ConsumeOneOrMore
 *  ConsumeZeroOrMore
 */
Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
                                              bool started_with_dot) {
  bool is_float = false;

  if (started_with_zero && (TryConsume('x') || TryConsume('X'))) {
    // A hex number (started with "0x").
    ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");

  } else if (started_with_zero && LookingAt<Digit>()) {
    // An octal number (had a leading zero).
    ConsumeZeroOrMore<OctalDigit>();
    if (LookingAt<Digit>()) {
      AddError("Numbers starting with leading zero must be in octal.");
      ConsumeZeroOrMore<Digit>();
    }

  } else {
    // A decimal number.
    if (started_with_dot) {
      is_float = true;
      ConsumeZeroOrMore<Digit>();
    } else {
      ConsumeZeroOrMore<Digit>();

      if (TryConsume('.')) {
        is_float = true;
        ConsumeZeroOrMore<Digit>();
      }
    }

    if (TryConsume('e') || TryConsume('E')) {
      is_float = true;
      TryConsume('-') || TryConsume('+');
      ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
    }

    if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) {
      is_float = true;
    }
  }

  if (LookingAt<Letter>()) {
    AddError("Need space between number and identifier.");
  } else if (current_char_ == '.') {
    if (is_float) {
      AddError(
        "Already saw decimal point or exponent; can't have another one.");
    } else {
      AddError("Hex and octal numbers must be integers.");
    }
  }

  return is_float ? TYPE_FLOAT : TYPE_INTEGER;
}

## ConsumeString.cpp
/**
 * Consume string from input stream
 */

void Tokenizer::ConsumeString(char delimiter) {
  while (true) {
    switch (current_char_) {
      case '\0':
      case '\n': {
        AddError("String literals cannot cross line boundaries.");
        return;
      }

      case '\\': {
        // An escape sequence.
        NextChar();
        if (TryConsumeOne<Escape>()) {
          // Valid escape sequence.
        } else if (TryConsumeOne<OctalDigit>()) {
          // Possibly followed by two more octal digits, but these will
          // just be consumed by the main loop anyway so we don't need
          // to do so explicitly here.
        } else if (TryConsume('x') || TryConsume('X')) {
          if (!TryConsumeOne<HexDigit>()) {
            AddError("Expected hex digits for escape sequence.");
          }
          // Possibly followed by another hex digit, but again we don't care.
        } else if (TryConsume('u')) {
          if (!TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>()) {
            AddError("Expected four hex digits for \\u escape sequence.");
          }
        } else if (TryConsume('U')) {
          // We expect 8 hex digits; but only the range up to 0x10ffff is
          // legal.
          if (!TryConsume('0') ||
              !TryConsume('0') ||
              !(TryConsume('0') || TryConsume('1')) ||
              !TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>() ||
              !TryConsumeOne<HexDigit>()) {
            AddError("Expected eight hex digits up to 10ffff for \\U escape "
                     "sequence");
          }
        } else {
          AddError("Invalid escape sequence in string literal.");
        }
        break;
      }

      default: {
        if (current_char_ == delimiter) {
          NextChar();
          return;
        }
        NextChar();
        break;
      }
    }
  }
}

## StopEvilConstructor.cpp
/**
 * Like it! :)
 */

#define GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TypeName)    \
  TypeName(const TypeName&);                           \
  void operator=(const TypeName&)


class CommandLineInterface {
 private:
  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CommandLineInterface);
};
	// These "character classes" are designed to be used in template methods.
	// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
	// whitespace.

	// Note: No class is allowed to contain '\0', since this is used to mark end-
	// of-input and is handled specially.

	#define CHARACTER_CLASS(NAME, EXPRESSION) \
	class NAME { \
	public: \
	static inline bool InClass(char c) { \
	return EXPRESSION; \
	} \
	}

	CHARACTER_CLASS(Whitespace, c == ' ' \|\| c == '\n' \|\| c == '\t' \|\|
	c == '\r' \|\| c == '\v' \|\| c == '\f');
	CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' \|\| c == '\t' \|\|
	c == '\r' \|\| c == '\v' \|\| c == '\f');

	CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');

	CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
	CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
	CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') \|\|
	('a' <= c && c <= 'f') \|\|
	('A' <= c && c <= 'F'));

	CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') \|\|
	('A' <= c && c <= 'Z') \|\|
	(c == '_'));

	CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') \|\|
	('A' <= c && c <= 'Z') \|\|
	('0' <= c && c <= '9') \|\|
	(c == '_'));

	CHARACTER_CLASS(Escape, c == 'a' \|\| c == 'b' \|\| c == 'f' \|\| c == 'n' \|\|
	c == 'r' \|\| c == 't' \|\| c == 'v' \|\| c == '\\' \|\|
	c == '?' \|\| c == '\'' \|\| c == '\"');

	#undef CHARACTER_CLASS
	/**
	* Lessons Learned:
	* 1. Encapsulate basic and usual operations so that
	* we can write code like write English sentence
	* 2. Using template, metaprogramming
	* 3. readable code !
	*/

	//Eat one character of special type
	template<typename CharacterClass>
	inline bool Tokenizer::TryConsumeOne() {
	if (CharacterClass::InClass(current_char_)) {
	NextChar();
	return true;
	} else {
	return false;
	}
	}

	//Eat one special character
	inline bool Tokenizer::TryConsume(char c) {
	if (current_char_ == c) {
	NextChar();
	return true;
	} else {
	return false;
	}
	}

	/**
	* Eat number characters from input stream
	* Support integers, floats, hex digit, octal digit
	*
	* Basic operations:
	* TryConsume
	* LookingAt<typename>
	* ConsumeOneOrMore
	* ConsumeZeroOrMore
	*/
	Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
	bool started_with_dot) {
	bool is_float = false;

	if (started_with_zero && (TryConsume('x') \|\| TryConsume('X'))) {
	// A hex number (started with "0x").
	ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");

	} else if (started_with_zero && LookingAt<Digit>()) {
	// An octal number (had a leading zero).
	ConsumeZeroOrMore<OctalDigit>();
	if (LookingAt<Digit>()) {
	AddError("Numbers starting with leading zero must be in octal.");
	ConsumeZeroOrMore<Digit>();
	}

	} else {
	// A decimal number.
	if (started_with_dot) {
	is_float = true;
	ConsumeZeroOrMore<Digit>();
	} else {
	ConsumeZeroOrMore<Digit>();

	if (TryConsume('.')) {
	is_float = true;
	ConsumeZeroOrMore<Digit>();
	}
	}

	if (TryConsume('e') \|\| TryConsume('E')) {
	is_float = true;
	TryConsume('-') \|\| TryConsume('+');
	ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
	}

	if (allow_f_after_float_ && (TryConsume('f') \|\| TryConsume('F'))) {
	is_float = true;
	}
	}

	if (LookingAt<Letter>()) {
	AddError("Need space between number and identifier.");
	} else if (current_char_ == '.') {
	if (is_float) {
	AddError(
	"Already saw decimal point or exponent; can't have another one.");
	} else {
	AddError("Hex and octal numbers must be integers.");
	}
	}

	return is_float ? TYPE_FLOAT : TYPE_INTEGER;
	}
	/**
	* Consume string from input stream
	*/

	void Tokenizer::ConsumeString(char delimiter) {
	while (true) {
	switch (current_char_) {
	case '\0':
	case '\n': {
	AddError("String literals cannot cross line boundaries.");
	return;
	}

	case '\\': {
	// An escape sequence.
	NextChar();
	if (TryConsumeOne<Escape>()) {
	// Valid escape sequence.
	} else if (TryConsumeOne<OctalDigit>()) {
	// Possibly followed by two more octal digits, but these will
	// just be consumed by the main loop anyway so we don't need
	// to do so explicitly here.
	} else if (TryConsume('x') \|\| TryConsume('X')) {
	if (!TryConsumeOne<HexDigit>()) {
	AddError("Expected hex digits for escape sequence.");
	}
	// Possibly followed by another hex digit, but again we don't care.
	} else if (TryConsume('u')) {
	if (!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>()) {
	AddError("Expected four hex digits for \\u escape sequence.");
	}
	} else if (TryConsume('U')) {
	// We expect 8 hex digits; but only the range up to 0x10ffff is
	// legal.
	if (!TryConsume('0') \|\|
	!TryConsume('0') \|\|
	!(TryConsume('0') \|\| TryConsume('1')) \|\|
	!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>() \|\|
	!TryConsumeOne<HexDigit>()) {
	AddError("Expected eight hex digits up to 10ffff for \\U escape "
	"sequence");
	}
	} else {
	AddError("Invalid escape sequence in string literal.");
	}
	break;
	}

	default: {
	if (current_char_ == delimiter) {
	NextChar();
	return;
	}
	NextChar();
	break;
	}
	}
	}
	}
	/**
	* Like it! :)
	*/

	#define GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
	TypeName(const TypeName&); \
	void operator=(const TypeName&)


	class CommandLineInterface {
	private:
	GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CommandLineInterface);
	};