mrdmnd/ex2.c

## ex2.c
#include "stdio.h"
#include "stdlib.h"
#include "string.h"

#define MAX_DICTIONARY_SIZE 300000
#define MAX_WORD_LENGTH 15
// We should guarantee room for a newline and null byte in our buffer.
#define BUFFER_SIZE (MAX_WORD_LENGTH + 2)

int populate_dictionary(char *filepath, char dictionary[][BUFFER_SIZE]) {
  FILE *fp = fopen(filepath, "r");
  if (fp == NULL) {
    puts("Error: could not open file.\n");
    exit(1);
  }

  int num_words = 0;
  while (fgets(dictionary[num_words], BUFFER_SIZE, fp)) {
    // Remove trailing newline: fgets() leaves the newline on, and we don't want
    // that.
    dictionary[num_words][strcspn(dictionary[num_words], "\n")] = 0;
    num_words++;
  }
  // Remember to close the file!
  fclose(fp);
  return num_words;
}

// This function takes a "needle" as `input` and a `dictionary` containing
// `num_words` elements, and prints all elements in the dictionary that
// containing the "needle" as a substring. It returns the number of matches
// found.
int count_matches(char *input, char dictionary[][BUFFER_SIZE], int num_words) {
  int num_matches = 0;
  for (int i = 0; i < num_words; i++) {
    if (strstr(dictionary[i], input)) {
      num_matches++;
      printf("%s\n", dictionary[i]);
    }
  }
  return num_matches;
}

// This function returns 1 (true) if a and b are anagrams of each other (contain
// the exact same characters) and 0 (false) otherwise.
int is_anagram_pair(char *a, char *b) {
  // Build letter frequency counters.
  // a_freq[0] counts the number of A characters in a
  // a_freq[1] counts the number of B characters in a
  // ...
  // b_freq[0] counts the number of A characters in b
  // ...
  int a_freq[26] = {0};
  int b_freq[26] = {0};

  // Walk through the strings, and increment the appropriate frequency counter.
  // Note: doing "character subtraction" here is well defined! These get
  // promoted to integers (between 0 and 255), and the ordering lets us define
  // things like 'C' - 'A' = 2. This ONLY WORKS if all of the characters 'ch' in
  // our input strings are defined such that 'ch' - 'A' is in [0, 25]
  // (otherwise, you'll attempt to access an invalid index in the array and get
  // a segfault) - as it turns out, this works out fine because all of the
  // characters in the input are in the capital letter set. Just a neat trick
  // here to build a map from character to count!
  for (int i = 0; i < strlen(a); i++) {
    a_freq[a[i] - 'A']++;
  }

  for (int i = 0; i < strlen(b); i++) {
    b_freq[b[i] - 'A']++;
  }

  // Short circuit if frequencies ever mismatch:
  // as soon as we have a character in one word that's not in another,
  // we know these words are not anagrams.
  for (int i = 0; i < 26; i++) {
    if (a_freq[i] != b_freq[i]) {
      return 0;
    }
  }
  return 1;
}

// This function loops over the dictionary and prints all words that are
// anagrams of the input.
void anagrams(char *input, char dictionary[][BUFFER_SIZE], int num_words) {
  for (int i = 0; i < num_words; i++) {
    // Compare the letter frequencies of each word to validate anagram-hood.
    if (is_anagram_pair(dictionary[i], input)) {
      printf("%s\n", dictionary[i]);
    }
  }
}

int main(int argc, char **argv) {
  if (argc != 2) {
    puts("Error: pass the dictionary as a command line argument.\n");
    exit(1);
  }
  char dictionary[MAX_DICTIONARY_SIZE][BUFFER_SIZE];
  int num_words = populate_dictionary(argv[1], dictionary);
  printf("Counted %d words.\n", num_words);
  printf("Tenth element: %s\n", dictionary[9]);
  printf("Last element: %s\n", dictionary[num_words - 1]);

  printf("Number of matches for 'STRUCT': %d\n",
         count_matches("STRUCT", dictionary, num_words));

  anagrams("RESPECT", dictionary, num_words);
}
	#include "stdio.h"
	#include "stdlib.h"
	#include "string.h"

	#define MAX_DICTIONARY_SIZE 300000
	#define MAX_WORD_LENGTH 15
	// We should guarantee room for a newline and null byte in our buffer.
	#define BUFFER_SIZE (MAX_WORD_LENGTH + 2)

	int populate_dictionary(char *filepath, char dictionary[][BUFFER_SIZE]) {
	FILE *fp = fopen(filepath, "r");
	if (fp == NULL) {
	puts("Error: could not open file.\n");
	exit(1);
	}

	int num_words = 0;
	while (fgets(dictionary[num_words], BUFFER_SIZE, fp)) {
	// Remove trailing newline: fgets() leaves the newline on, and we don't want
	// that.
	dictionary[num_words][strcspn(dictionary[num_words], "\n")] = 0;
	num_words++;
	}
	// Remember to close the file!
	fclose(fp);
	return num_words;
	}

	// This function takes a "needle" as `input` and a `dictionary` containing
	// `num_words` elements, and prints all elements in the dictionary that
	// containing the "needle" as a substring. It returns the number of matches
	// found.
	int count_matches(char *input, char dictionary[][BUFFER_SIZE], int num_words) {
	int num_matches = 0;
	for (int i = 0; i < num_words; i++) {
	if (strstr(dictionary[i], input)) {
	num_matches++;
	printf("%s\n", dictionary[i]);
	}
	}
	return num_matches;
	}

	// This function returns 1 (true) if a and b are anagrams of each other (contain
	// the exact same characters) and 0 (false) otherwise.
	int is_anagram_pair(char a, char b) {
	// Build letter frequency counters.
	// a_freq[0] counts the number of A characters in a
	// a_freq[1] counts the number of B characters in a
	// ...
	// b_freq[0] counts the number of A characters in b
	// ...
	int a_freq[26] = {0};
	int b_freq[26] = {0};

	// Walk through the strings, and increment the appropriate frequency counter.
	// Note: doing "character subtraction" here is well defined! These get
	// promoted to integers (between 0 and 255), and the ordering lets us define
	// things like 'C' - 'A' = 2. This ONLY WORKS if all of the characters 'ch' in
	// our input strings are defined such that 'ch' - 'A' is in [0, 25]
	// (otherwise, you'll attempt to access an invalid index in the array and get
	// a segfault) - as it turns out, this works out fine because all of the
	// characters in the input are in the capital letter set. Just a neat trick
	// here to build a map from character to count!
	for (int i = 0; i < strlen(a); i++) {
	a_freq[a[i] - 'A']++;
	}

	for (int i = 0; i < strlen(b); i++) {
	b_freq[b[i] - 'A']++;
	}

	// Short circuit if frequencies ever mismatch:
	// as soon as we have a character in one word that's not in another,
	// we know these words are not anagrams.
	for (int i = 0; i < 26; i++) {
	if (a_freq[i] != b_freq[i]) {
	return 0;
	}
	}
	return 1;
	}

	// This function loops over the dictionary and prints all words that are
	// anagrams of the input.
	void anagrams(char *input, char dictionary[][BUFFER_SIZE], int num_words) {
	for (int i = 0; i < num_words; i++) {
	// Compare the letter frequencies of each word to validate anagram-hood.
	if (is_anagram_pair(dictionary[i], input)) {
	printf("%s\n", dictionary[i]);
	}
	}
	}

	int main(int argc, char **argv) {
	if (argc != 2) {
	puts("Error: pass the dictionary as a command line argument.\n");
	exit(1);
	}
	char dictionary[MAX_DICTIONARY_SIZE][BUFFER_SIZE];
	int num_words = populate_dictionary(argv[1], dictionary);
	printf("Counted %d words.\n", num_words);
	printf("Tenth element: %s\n", dictionary[9]);
	printf("Last element: %s\n", dictionary[num_words - 1]);

	printf("Number of matches for 'STRUCT': %d\n",
	count_matches("STRUCT", dictionary, num_words));

	anagrams("RESPECT", dictionary, num_words);
	}