Simple portable optimized ASCII-only version of wc util
/* | |
* zlib License, see https://en.wikipedia.org/wiki/Zlib_License | |
* | |
* Copyright (c) 2020 Leonid Yuriev <leo@yuriev.ru>, | |
* Simple portable optimized ASCII-only version of wc tool. | |
* Just for fun & https://en.wikipedia.org/wiki/The_Computer_Language_Benchmarks_Game | |
* | |
* This software is provided 'as-is', without any express or implied | |
* warranty. In no event will the authors be held liable for any damages | |
* arising from the use of this software. | |
* | |
* Permission is granted to anyone to use this software for any purpose, | |
* including commercial applications, and to alter it and redistribute it | |
* freely, subject to the following restrictions: | |
* | |
* 1. The origin of this software must not be misrepresented; you must not | |
* claim that you wrote the original software. If you use this software | |
* in a product, an acknowledgement in the product documentation would be | |
* appreciated but is not required. | |
* 2. Altered source versions must be plainly marked as such, and must not be | |
* misrepresented as being the original software. | |
* 3. This notice may not be removed or altered from any source distribution. | |
* | |
*/ | |
#include <errno.h> | |
#include <fcntl.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <time.h> | |
#include <unistd.h> | |
static struct { size_t chars, words, lines; } result; | |
typedef struct { | |
uint16_t m[4]; | |
} hyper_t; | |
static hyper_t sym2hyper[65536]; | |
static uint8_t hyper2value[512]; | |
static void build_hyper(void) { | |
for (unsigned i = 0; i < 65536; ++i) { | |
unsigned char c[2]; | |
memcpy(c, &i, 2); | |
const unsigned ln = (c[0] == '\n') + (c[1] == '\n'); | |
const _Bool w0 = (c[0] != ' ') && (c[0] - 9 > 4); | |
const _Bool w1 = (c[1] != ' ') && (c[1] - 9 > 4); | |
sym2hyper[i].m[0] = ln + (w0 << 5) + (w1 << 6); | |
sym2hyper[i].m[1] = ln + (w0 << 7) + (w1 << 8); | |
sym2hyper[i].m[2] = ln + (w0 << 9) + (w1 << 10); | |
sym2hyper[i].m[3] = ln + (w0 << 11) + (w1 << 12); | |
} | |
for (unsigned i = 0; i < 512; ++i) { | |
_Bool prev = i & 1; | |
for (unsigned mask = i >> 1; mask > 0; mask >>= 1) { | |
hyper2value[i] += (mask & 1) && !prev; | |
prev = mask & 1; | |
} | |
} | |
} | |
static unsigned map(const unsigned char *text, size_t i, size_t n) { | |
return sym2hyper[*(uint16_t *)(text + i + n * 2)].m[n]; | |
} | |
static unsigned reduce(unsigned prev, unsigned hyper_a, unsigned hyper_b, | |
unsigned hyper_c, unsigned hyper_d) { | |
return prev + hyper_a + hyper_b + hyper_c + hyper_d; | |
} | |
static unsigned apply(unsigned hyper) { | |
result.lines += hyper & 15; | |
result.words += hyper2value[hyper >> 4]; | |
return (hyper >> 8) & 16; | |
} | |
static unsigned process_chunk(const unsigned char *text, const size_t bytes, | |
unsigned prev) { | |
result.chars += bytes; | |
for (size_t i = 0; i < bytes; i += 8) | |
prev = apply(reduce(prev, map(text, i, 0), map(text, i, 1), map(text, i, 2), | |
map(text, i, 3))); | |
return prev; | |
} | |
/******************************************************************************/ | |
static int process_fd(int fd) { | |
off_t length = lseek(fd, 0, SEEK_END); | |
if (length >= 0 && length <= INTPTR_MAX) { | |
void *ptr = mmap(NULL, (size_t)length, PROT_READ, MAP_PRIVATE, fd, 0); | |
if (ptr == MAP_FAILED) { | |
perror("mmap"); | |
return EXIT_FAILURE; | |
} | |
process_chunk((const unsigned char *)ptr, (size_t)length, 0); | |
munmap(ptr, length); | |
return EXIT_SUCCESS; | |
} | |
if (length < 0 && errno != ESPIPE) { | |
perror("leeek"); | |
return EXIT_FAILURE; | |
} | |
_Bool state = 0; | |
ssize_t chunk; | |
for (;;) { | |
unsigned char buf[65536]; | |
chunk = read(fd, buf, sizeof(buf)); | |
if (chunk < 1) | |
break; | |
if (chunk < (int)sizeof(buf)) | |
memset(buf + chunk, ' ', (8 - chunk) & 7); | |
state = process_chunk(buf, chunk, state); | |
} | |
if (chunk < 0) { | |
perror("read"); | |
return EXIT_FAILURE; | |
} | |
return EXIT_SUCCESS; | |
} | |
int main(int argc, const char *argv[]) { | |
build_hyper(); | |
int rc = 42; | |
if (argc > 1) { | |
for (int i = 1; i < argc; ++i) { | |
int fd = | |
(strcmp(argv[i], "-") == 0) ? STDIN_FILENO : open(argv[1], O_RDONLY); | |
if (fd < 0) { | |
perror("open"); | |
return EXIT_FAILURE; | |
} | |
rc = process_fd(fd); | |
close(fd); | |
if (rc != EXIT_SUCCESS) | |
break; | |
} | |
} else | |
rc = process_fd(STDIN_FILENO); | |
struct timespec ts = {0, 0}; | |
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts)) | |
perror("clock_gettime(CLOCK_PROCESS_CPUTIME_ID)"); | |
if (rc == EXIT_SUCCESS) | |
printf("lines %zu, words %zu, chars %zu\ntook %.6f seconds\n", result.lines, | |
result.words, result.chars, ts.tv_nsec * 1e-9 + ts.tv_sec); | |
return rc; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment