scivision/wc_josh-katz.c

## wc_josh-katz.c
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

// This is a "chunk" of characters that we can pull out and handle
// at one time. Since arrays in C are just collections of bytes we
// can iterate through a char array and pretend it was an int array.
// Using this union you can still get at the individual characters.
typedef union {
    int value;
    char letters[4];
} chunk_t;

// Total lines, words, and chars in all of the files passed.
static int total_line_count = 0, total_word_count = 0, total_char_count = 0;


void help(const char *cmd)
{
    printf("Usage: %s <--help|file...>\n", cmd);
    exit(1);
}

static inline void count_word(const char next, int * const word_count)
{
    static bool is_in_word = false;
    const bool is_next_space = isspace(next);
    if (is_in_word && is_next_space)
    {
        *word_count += 1;
        is_in_word = false;
    }
    else if (!is_in_word && !is_next_space)
    {
        is_in_word = true;
    }
}

static inline void count_newl(const char next, int * const line_count)
{
    *line_count += next == '\n';
}


static inline void count_chunk(const chunk_t * const chunk, int * const line_count, int * const word_count)
{
    // Count all new lines.
    count_newl(chunk->letters[0], line_count);
    count_newl(chunk->letters[1], line_count);
    count_newl(chunk->letters[2], line_count);
    count_newl(chunk->letters[3], line_count);

    // For each chunk, count the letters.
    count_word(chunk->letters[0], word_count);
    count_word(chunk->letters[1], word_count);
    count_word(chunk->letters[2], word_count);
    count_word(chunk->letters[3], word_count);
}


static inline void print_stats(const char* file_name)
{

#define BUFFER_SIZE (1024 * 16)

    // Make a character buffer for reading the chunks of the file into.
    // Most of the time pages are 4k or 8k aligned so the buffer should
    // be one of those sizes. This will make sure you don't ask for more
    // data then the kernel is likely to have buffered for you.
    static char cbuffer[BUFFER_SIZE];

    // Make a new way of looking at the character buffer. This lets you
    // loop throuh and look at 4 characters at a time. This cuts down on
    // the number of loops you are running and will let you eventually
    // pipeline instructions for counting.
    static chunk_t * const gbuffer = (chunk_t*) cbuffer;

    // Keep track of all of the values we want to print.
    int line_count = 0, word_count = 0, char_count = 0;

    // File handling. We only want to read.
    FILE* file = fopen(file_name, "r");

    if (!file)
    {
        printf("No such file %s\n", file_name);
        return;
    }

    // Read until we don't get any more data.
    size_t read_size;
    while ((read_size = fread(cbuffer, sizeof(char), BUFFER_SIZE, file)))
    {
        // Count characters
        char_count += read_size;

        // Handle bulk chunks
        for (int i = 0; i < (read_size / sizeof(chunk_t)); i++)
            count_chunk(&gbuffer[i], &line_count, &word_count);

        // Handle where N % 4 != 0. This is left over characters at the
        // end of the buffer that exist if the file length wasn't divisable
        // by sizeof(int)
        for (int i = (read_size - (read_size % 4)); i < read_size; i++)
        {
            count_newl(cbuffer[i], &line_count);
            count_word(cbuffer[i], &word_count);
        }
    }

    fclose(file);

    printf("%d %d %d %s\n", line_count, word_count, char_count, file_name);

    // Add to the total line count.
    total_line_count += line_count;
    total_word_count += word_count;
    total_char_count += char_count;
}


int main(const int argc, const char *argv[])
{
    if (argc == 1 || strcmp(argv[1], "--help") == 0)
        help(argv[0]);

    for (int i = 1; i < argc; i++)
        print_stats(argv[i]);

    // If more then one file print totals.
    if (argc - 1 > 1)
        printf("%d %d %d total\n", total_line_count, total_word_count, total_char_count);
}
	#include <stdbool.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <ctype.h>

	// This is a "chunk" of characters that we can pull out and handle
	// at one time. Since arrays in C are just collections of bytes we
	// can iterate through a char array and pretend it was an int array.
	// Using this union you can still get at the individual characters.
	typedef union {
	int value;
	char letters[4];
	} chunk_t;

	// Total lines, words, and chars in all of the files passed.
	static int total_line_count = 0, total_word_count = 0, total_char_count = 0;


	void help(const char *cmd)
	{
	printf("Usage: %s <--help\|file...>\n", cmd);
	exit(1);
	}

	static inline void count_word(const char next, int * const word_count)
	{
	static bool is_in_word = false;
	const bool is_next_space = isspace(next);
	if (is_in_word && is_next_space)
	{
	*word_count += 1;
	is_in_word = false;
	}
	else if (!is_in_word && !is_next_space)
	{
	is_in_word = true;
	}
	}

	static inline void count_newl(const char next, int * const line_count)
	{
	*line_count += next == '\n';
	}


	static inline void count_chunk(const chunk_t * const chunk, int * const line_count, int * const word_count)
	{
	// Count all new lines.
	count_newl(chunk->letters[0], line_count);
	count_newl(chunk->letters[1], line_count);
	count_newl(chunk->letters[2], line_count);
	count_newl(chunk->letters[3], line_count);

	// For each chunk, count the letters.
	count_word(chunk->letters[0], word_count);
	count_word(chunk->letters[1], word_count);
	count_word(chunk->letters[2], word_count);
	count_word(chunk->letters[3], word_count);
	}


	static inline void print_stats(const char* file_name)
	{

	#define BUFFER_SIZE (1024 * 16)

	// Make a character buffer for reading the chunks of the file into.
	// Most of the time pages are 4k or 8k aligned so the buffer should
	// be one of those sizes. This will make sure you don't ask for more
	// data then the kernel is likely to have buffered for you.
	static char cbuffer[BUFFER_SIZE];

	// Make a new way of looking at the character buffer. This lets you
	// loop throuh and look at 4 characters at a time. This cuts down on
	// the number of loops you are running and will let you eventually
	// pipeline instructions for counting.
	static chunk_t * const gbuffer = (chunk_t*) cbuffer;

	// Keep track of all of the values we want to print.
	int line_count = 0, word_count = 0, char_count = 0;

	// File handling. We only want to read.
	FILE* file = fopen(file_name, "r");

	if (!file)
	{
	printf("No such file %s\n", file_name);
	return;
	}

	// Read until we don't get any more data.
	size_t read_size;
	while ((read_size = fread(cbuffer, sizeof(char), BUFFER_SIZE, file)))
	{
	// Count characters
	char_count += read_size;

	// Handle bulk chunks
	for (int i = 0; i < (read_size / sizeof(chunk_t)); i++)
	count_chunk(&gbuffer[i], &line_count, &word_count);

	// Handle where N % 4 != 0. This is left over characters at the
	// end of the buffer that exist if the file length wasn't divisable
	// by sizeof(int)
	for (int i = (read_size - (read_size % 4)); i < read_size; i++)
	{
	count_newl(cbuffer[i], &line_count);
	count_word(cbuffer[i], &word_count);
	}
	}

	fclose(file);

	printf("%d %d %d %s\n", line_count, word_count, char_count, file_name);

	// Add to the total line count.
	total_line_count += line_count;
	total_word_count += word_count;
	total_char_count += char_count;
	}


	int main(const int argc, const char *argv[])
	{
	if (argc == 1 \|\| strcmp(argv[1], "--help") == 0)
	help(argv[0]);

	for (int i = 1; i < argc; i++)
	print_stats(argv[i]);

	// If more then one file print totals.
	if (argc - 1 > 1)
	printf("%d %d %d total\n", total_line_count, total_word_count, total_char_count);
	}