Last active
March 15, 2019 07:36
-
-
Save forestbelton/224c9e4206f9c0099dbead697f37b4e5 to your computer and use it in GitHub Desktop.
lexer header sketch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef CASE_LEXER_H_ | |
#define CASE_LEXER_H_ | |
struct lexer; | |
/** | |
* A data type which accepts a string of characters and produces a stream of | |
* tokens based on a set of predefined rules. | |
*/ | |
typedef struct lexer lexer; | |
/** | |
* A rule definition for constructing a lexer. | |
*/ | |
typedef struct { | |
int tag; /**< The tag value for this rule */ | |
const char *regex; /**< A regular expression describing the rule */ | |
} lexer_rule; | |
/** | |
* The lexer's possible states. | |
*/ | |
typedef enum { | |
LEXER_STATE_READY, /**< The lexer is ready to receive more input */ | |
LEXER_STATE_TOKEN, /**< The lexer has produced a token */ | |
LEXER_STATE_ERROR /**< The lexer has encountered an error */ | |
} lexer_state; | |
/** | |
* A single token produced by the lexer. | |
*/ | |
typedef struct { | |
int tag; /**< The tag value for the rule that was matched */ | |
const char *content; /**< The string representing this token */ | |
} lexer_token; | |
/** | |
* Create a lexer from a sequence of rule definitions. | |
* @param[in] rules The rule definitions | |
* @param rule_count The total number of rules | |
* @return The new lexer, or NULL if an error occurred | |
*/ | |
lexer *lexer_new(lexer_rule **rules, size_t rule_count); | |
/** | |
* Apply the next character from the input stream to the lexer state. | |
* @param[in] lex The lexer | |
* @param c The character to apply | |
* @return The new state of the lexer | |
*/ | |
lexer_state lexer_putchar(lexer *lex, char c); | |
/** | |
* Retrieve the last produced token from the lexer. | |
* @param[in] lex The lexer | |
* @return A pointer to the last produced token, or NULL if the last call to | |
* lexer_putchar(lexer *lex, char c) did not return LEXER_STATE_TOKEN | |
*/ | |
lexer_token *lexer_next(lexer *lex); | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment