Skip to content

Instantly share code, notes, and snippets.

@mrdmnd
Created March 20, 2020 22:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdmnd/734b76f7c73ce8ee2abf824348fb412f to your computer and use it in GitHub Desktop.
Save mrdmnd/734b76f7c73ce8ee2abf824348fb412f to your computer and use it in GitHub Desktop.
Solutions to the second problem set
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#define MAX_DICTIONARY_SIZE 300000
#define MAX_WORD_LENGTH 15
// We should guarantee room for a newline and null byte in our buffer.
#define BUFFER_SIZE (MAX_WORD_LENGTH + 2)
int populate_dictionary(char *filepath, char dictionary[][BUFFER_SIZE]) {
FILE *fp = fopen(filepath, "r");
if (fp == NULL) {
puts("Error: could not open file.\n");
exit(1);
}
int num_words = 0;
while (fgets(dictionary[num_words], BUFFER_SIZE, fp)) {
// Remove trailing newline: fgets() leaves the newline on, and we don't want
// that.
dictionary[num_words][strcspn(dictionary[num_words], "\n")] = 0;
num_words++;
}
// Remember to close the file!
fclose(fp);
return num_words;
}
// This function takes a "needle" as `input` and a `dictionary` containing
// `num_words` elements, and prints all elements in the dictionary that
// containing the "needle" as a substring. It returns the number of matches
// found.
int count_matches(char *input, char dictionary[][BUFFER_SIZE], int num_words) {
int num_matches = 0;
for (int i = 0; i < num_words; i++) {
if (strstr(dictionary[i], input)) {
num_matches++;
printf("%s\n", dictionary[i]);
}
}
return num_matches;
}
// This function returns 1 (true) if a and b are anagrams of each other (contain
// the exact same characters) and 0 (false) otherwise.
int is_anagram_pair(char *a, char *b) {
// Build letter frequency counters.
// a_freq[0] counts the number of A characters in a
// a_freq[1] counts the number of B characters in a
// ...
// b_freq[0] counts the number of A characters in b
// ...
int a_freq[26] = {0};
int b_freq[26] = {0};
// Walk through the strings, and increment the appropriate frequency counter.
// Note: doing "character subtraction" here is well defined! These get
// promoted to integers (between 0 and 255), and the ordering lets us define
// things like 'C' - 'A' = 2. This ONLY WORKS if all of the characters 'ch' in
// our input strings are defined such that 'ch' - 'A' is in [0, 25]
// (otherwise, you'll attempt to access an invalid index in the array and get
// a segfault) - as it turns out, this works out fine because all of the
// characters in the input are in the capital letter set. Just a neat trick
// here to build a map from character to count!
for (int i = 0; i < strlen(a); i++) {
a_freq[a[i] - 'A']++;
}
for (int i = 0; i < strlen(b); i++) {
b_freq[b[i] - 'A']++;
}
// Short circuit if frequencies ever mismatch:
// as soon as we have a character in one word that's not in another,
// we know these words are not anagrams.
for (int i = 0; i < 26; i++) {
if (a_freq[i] != b_freq[i]) {
return 0;
}
}
return 1;
}
// This function loops over the dictionary and prints all words that are
// anagrams of the input.
void anagrams(char *input, char dictionary[][BUFFER_SIZE], int num_words) {
for (int i = 0; i < num_words; i++) {
// Compare the letter frequencies of each word to validate anagram-hood.
if (is_anagram_pair(dictionary[i], input)) {
printf("%s\n", dictionary[i]);
}
}
}
int main(int argc, char **argv) {
if (argc != 2) {
puts("Error: pass the dictionary as a command line argument.\n");
exit(1);
}
char dictionary[MAX_DICTIONARY_SIZE][BUFFER_SIZE];
int num_words = populate_dictionary(argv[1], dictionary);
printf("Counted %d words.\n", num_words);
printf("Tenth element: %s\n", dictionary[9]);
printf("Last element: %s\n", dictionary[num_words - 1]);
printf("Number of matches for 'STRUCT': %d\n",
count_matches("STRUCT", dictionary, num_words));
anagrams("RESPECT", dictionary, num_words);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment