Skip to content

Instantly share code, notes, and snippets.

@erthink
Created February 26, 2020 02:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erthink/a7d35192c1c8596d48a12e6fff459db1 to your computer and use it in GitHub Desktop.
Save erthink/a7d35192c1c8596d48a12e6fff459db1 to your computer and use it in GitHub Desktop.
Inspired by ghc naive C-implementation of wc
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
static struct { size_t chars, words, lines; } result;
static _Bool process_chunk(const unsigned char *text, const size_t bytes,
_Bool prev) {
result.chars += bytes;
for (size_t i = 0; i < bytes;) {
if (text[i] > ' ') {
// под-цикл по словам
result.words += !prev;
prev = 1;
while (++i < bytes && text[i] > ' ')
;
} else {
// под-цикл по пробелам
prev = 0;
do {
_Bool non_space = text[i] != ' ' && text[i] - 9 > 4;
result.words += !prev && non_space;
result.lines += text[i] == '\n';
prev = non_space;
}
while (++i < bytes && text[i] <= ' ');
}
}
return prev;
}
static int process_fd(int fd) {
off_t length = lseek(fd, 0, SEEK_END);
if (length >= 0 && length <= INTPTR_MAX) {
void *ptr = mmap(NULL, (size_t)length, PROT_READ, MAP_PRIVATE, fd, 0);
if (ptr == MAP_FAILED) {
perror("mmap");
return EXIT_FAILURE;
}
process_chunk((const unsigned char *)ptr, (size_t)length, 0);
munmap(ptr, length);
return EXIT_SUCCESS;
}
if (length < 0 && errno != ESPIPE) {
perror("leeek");
return EXIT_FAILURE;
}
_Bool state = 0;
ssize_t chunk;
for (;;) {
unsigned char buf[65536];
chunk = read(fd, buf, sizeof(buf));
if (chunk < 1)
break;
if (chunk < (int)sizeof(buf))
memset(buf + chunk, ' ', (8 - chunk) & 7);
state = process_chunk(buf, chunk, state);
}
if (chunk < 0) {
perror("read");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int main(int argc, const char *argv[]) {
int rc = 42;
if (argc > 1) {
for (int i = 1; i < argc; ++i) {
int fd =
(strcmp(argv[i], "-") == 0) ? STDIN_FILENO : open(argv[1], O_RDONLY);
if (fd < 0) {
perror("open");
return EXIT_FAILURE;
}
rc = process_fd(fd);
close(fd);
if (rc != EXIT_SUCCESS)
break;
}
} else
rc = process_fd(STDIN_FILENO);
struct timespec ts = {0, 0};
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts))
perror("clock_gettime(CLOCK_PROCESS_CPUTIME_ID)");
if (rc == EXIT_SUCCESS)
printf("lines %zu, words %zu, chars %zu\ntook %.6f seconds\n", result.lines,
result.words, result.chars, ts.tv_nsec * 1e-9 + ts.tv_sec);
return rc;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment