Skip to content

Instantly share code, notes, and snippets.

@EdThePro101
Created January 6, 2021 12:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EdThePro101/14b2ac5507fbe36be92adaafb556801c to your computer and use it in GitHub Desktop.
Save EdThePro101/14b2ac5507fbe36be92adaafb556801c to your computer and use it in GitHub Desktop.
Tokenise a string in C by whitespaces.
#include <stdio.h>
#include <string.h>
// Check if a character is a digit
char is_digit(char curr) {
return (curr >= '0' && curr <= '9');
}
// Check if a character is a letter
char is_letter(char curr) {
return ((curr >= 'A' && curr <= 'Z') || (curr >= 'a' && curr <= 'z'));
}
// Check if a character is a whitespace
char is_whitespace(char curr) {
return (curr == ' ' || curr == '\n' || curr == '\r' || curr == '\t');
}
// tokenise and print a string
void tokenise_string(char* string) {
char token[128] = "\0";
unsigned long int token_length = 0;
// Loop over the string character-by-character
for (unsigned long int i = 0; i < strlen(string); ++i) {
// If the current character is a letter
if (is_letter(string[i])) {
// And if it's the first character in a token, print "IDENT: "
if (token_length == 0) { printf("IDENT: "); }
// Append the character to the token and increment the token's length
token[token_length++] = string[i];
}
// If the current character is a digit
if (is_digit(string[i])) {
// And it's the first character in a token, print "NUMBER: ");
if (token_length == 0) { printf("NUMBER: "); }
// Append the character to the token and increment the token's length
token[token_length++] = string[i];
}
// If the current character is a whitespace
if (is_whitespace(string[i])) {
// And the token's length is not 0
if (token_length != 0) {
// Add a NULL character to the token to close the string
token[token_length++] = '\0';
// Print the token
printf("%s\n", token);
}
// NULL-ify the token
token[0] = '\0';
// Reset the token's length to 0
token_length = 0;
}
}
// If the token's length is not 0, there's still a token that needs to be printed, so let's do that.
if (token_length != 0) {
// Add a NULL character to finalise the string
token[token_length++] = '\0';
// Print the token
printf("%s\n", token);
// NULL-ify the token
token[0] = '\0';
// Reset the token's length to 0
token_length = 0;
}
}
int main() {
char* string = "5 plus 6 equals 11";
printf("Splitting string:\n%s\n", string);
tokenise_string(string);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment