Created
June 18, 2020 15:56
-
-
Save kennethchiu/2ad941ca8e89104653cd58d078afa25e to your computer and use it in GitHub Desktop.
DFA to recognize hex numbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <ctype.h> | |
#include <assert.h> | |
#include <string.h> | |
#include <stdlib.h> | |
// This function returns the numerical value of a hex digit. It returns -1 if | |
// it is not a valid hex digit. | |
int ishexdigit(int ch) { | |
// Should already be in lower case. This is defensive programming check, | |
// in case it was not converted. | |
assert(!isupper(ch)); | |
// Return of -1 means not a hex digit. | |
int rv = -1; | |
if (isdigit(ch)) { | |
rv = ch - '0'; | |
// strchr() will find the null byte at end of string, so need to disallow ch == '\0'. | |
} else if (ch != '\0' && strchr("abcdef", ch) != 0) { | |
rv = ch - 'a' + 10; | |
} | |
return rv; | |
} | |
int main(int argc, char **argv) { | |
assert(argc == 2); | |
// To simplify the code, we are actually going to process '\0'. That | |
// avoids the end of the string being a special case. So len here includes | |
// the null byte. | |
const int len = strlen(argv[1]) + 1; | |
// Use this to keep track of the state. I try to name the states after | |
// what the next character should be. | |
enum State { | |
ST_START = 1, // Start from 1 for defensive programming reasons. | |
ST_X, // Saw 0, looking for x. | |
ST_FIRST_DIGIT, // Saw x, looking for first digit. | |
ST_DIGIT // Saw first digit, looking for others. | |
} state = ST_START; | |
// This is the hex number that we are accumulating. | |
int hexnum; | |
// Iterate through the string, including the null byte. | |
for (int i = 0; i < len; i++) { | |
// Work only in lower case, to simplify. | |
int ch = tolower(argv[1][i]); | |
// The big switch is the heart of the state machine. | |
switch (state) { | |
case ST_START: | |
{ | |
if (ch == '0') { | |
state = ST_X; | |
} | |
} | |
break; | |
case ST_X: | |
{ | |
if (ch == 'x') { | |
state = ST_FIRST_DIGIT; | |
} | |
} | |
break; | |
case ST_FIRST_DIGIT: | |
{ | |
int d; | |
if ((d = ishexdigit(ch)) != -1) { | |
hexnum = d; | |
state = ST_DIGIT; | |
} else { | |
// If no digit after the 0x, don't count it as a number. | |
state = ST_START; | |
break; // Leave the do-while. | |
} | |
} | |
break; | |
case ST_DIGIT: | |
{ | |
int d; | |
if ((d = ishexdigit(ch)) != -1) { | |
hexnum = 16*hexnum + d; | |
} else { | |
printf("%d\n", hexnum); | |
state = ST_START; | |
} | |
} | |
break; | |
default: | |
assert(0); abort(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment