Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Simple portable optimized ASCII-only version of wc util
/*
* zlib License, see https://en.wikipedia.org/wiki/Zlib_License
*
* Copyright (c) 2020 Leonid Yuriev <leo@yuriev.ru>,
* Simple portable optimized ASCII-only version of wc tool.
* Just for fun & https://en.wikipedia.org/wiki/The_Computer_Language_Benchmarks_Game
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*
*/
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
static struct { size_t chars, words, lines; } result;
typedef struct {
uint16_t m[4];
} hyper_t;
static hyper_t sym2hyper[65536];
static uint8_t hyper2value[512];
static void build_hyper(void) {
for (unsigned i = 0; i < 65536; ++i) {
unsigned char c[2];
memcpy(c, &i, 2);
const unsigned ln = (c[0] == '\n') + (c[1] == '\n');
const _Bool w0 = (c[0] != ' ') && (c[0] - 9 > 4);
const _Bool w1 = (c[1] != ' ') && (c[1] - 9 > 4);
sym2hyper[i].m[0] = ln + (w0 << 5) + (w1 << 6);
sym2hyper[i].m[1] = ln + (w0 << 7) + (w1 << 8);
sym2hyper[i].m[2] = ln + (w0 << 9) + (w1 << 10);
sym2hyper[i].m[3] = ln + (w0 << 11) + (w1 << 12);
}
for (unsigned i = 0; i < 512; ++i) {
_Bool prev = i & 1;
for (unsigned mask = i >> 1; mask > 0; mask >>= 1) {
hyper2value[i] += (mask & 1) && !prev;
prev = mask & 1;
}
}
}
static unsigned map(const unsigned char *text, size_t i, size_t n) {
return sym2hyper[*(uint16_t *)(text + i + n * 2)].m[n];
}
static unsigned reduce(unsigned prev, unsigned hyper_a, unsigned hyper_b,
unsigned hyper_c, unsigned hyper_d) {
return prev + hyper_a + hyper_b + hyper_c + hyper_d;
}
static unsigned apply(unsigned hyper) {
result.lines += hyper & 15;
result.words += hyper2value[hyper >> 4];
return (hyper >> 8) & 16;
}
static unsigned process_chunk(const unsigned char *text, const size_t bytes,
unsigned prev) {
result.chars += bytes;
for (size_t i = 0; i < bytes; i += 8)
prev = apply(reduce(prev, map(text, i, 0), map(text, i, 1), map(text, i, 2),
map(text, i, 3)));
return prev;
}
/******************************************************************************/
static int process_fd(int fd) {
off_t length = lseek(fd, 0, SEEK_END);
if (length >= 0 && length <= INTPTR_MAX) {
void *ptr = mmap(NULL, (size_t)length, PROT_READ, MAP_PRIVATE, fd, 0);
if (ptr == MAP_FAILED) {
perror("mmap");
return EXIT_FAILURE;
}
process_chunk((const unsigned char *)ptr, (size_t)length, 0);
munmap(ptr, length);
return EXIT_SUCCESS;
}
if (length < 0 && errno != ESPIPE) {
perror("leeek");
return EXIT_FAILURE;
}
_Bool state = 0;
ssize_t chunk;
for (;;) {
unsigned char buf[65536];
chunk = read(fd, buf, sizeof(buf));
if (chunk < 1)
break;
if (chunk < (int)sizeof(buf))
memset(buf + chunk, ' ', (8 - chunk) & 7);
state = process_chunk(buf, chunk, state);
}
if (chunk < 0) {
perror("read");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int main(int argc, const char *argv[]) {
build_hyper();
int rc = 42;
if (argc > 1) {
for (int i = 1; i < argc; ++i) {
int fd =
(strcmp(argv[i], "-") == 0) ? STDIN_FILENO : open(argv[1], O_RDONLY);
if (fd < 0) {
perror("open");
return EXIT_FAILURE;
}
rc = process_fd(fd);
close(fd);
if (rc != EXIT_SUCCESS)
break;
}
} else
rc = process_fd(STDIN_FILENO);
struct timespec ts = {0, 0};
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts))
perror("clock_gettime(CLOCK_PROCESS_CPUTIME_ID)");
if (rc == EXIT_SUCCESS)
printf("lines %zu, words %zu, chars %zu\ntook %.6f seconds\n", result.lines,
result.words, result.chars, ts.tv_nsec * 1e-9 + ts.tv_sec);
return rc;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.