Created
February 26, 2020 02:37
-
-
Save erthink/a7d35192c1c8596d48a12e6fff459db1 to your computer and use it in GitHub Desktop.
Inspired by ghc naive C-implementation of wc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <errno.h> | |
#include <fcntl.h> | |
#include <stddef.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/mman.h> | |
#include <time.h> | |
#include <unistd.h> | |
static struct { size_t chars, words, lines; } result; | |
static _Bool process_chunk(const unsigned char *text, const size_t bytes, | |
_Bool prev) { | |
result.chars += bytes; | |
for (size_t i = 0; i < bytes;) { | |
if (text[i] > ' ') { | |
// под-цикл по словам | |
result.words += !prev; | |
prev = 1; | |
while (++i < bytes && text[i] > ' ') | |
; | |
} else { | |
// под-цикл по пробелам | |
prev = 0; | |
do { | |
_Bool non_space = text[i] != ' ' && text[i] - 9 > 4; | |
result.words += !prev && non_space; | |
result.lines += text[i] == '\n'; | |
prev = non_space; | |
} | |
while (++i < bytes && text[i] <= ' '); | |
} | |
} | |
return prev; | |
} | |
static int process_fd(int fd) { | |
off_t length = lseek(fd, 0, SEEK_END); | |
if (length >= 0 && length <= INTPTR_MAX) { | |
void *ptr = mmap(NULL, (size_t)length, PROT_READ, MAP_PRIVATE, fd, 0); | |
if (ptr == MAP_FAILED) { | |
perror("mmap"); | |
return EXIT_FAILURE; | |
} | |
process_chunk((const unsigned char *)ptr, (size_t)length, 0); | |
munmap(ptr, length); | |
return EXIT_SUCCESS; | |
} | |
if (length < 0 && errno != ESPIPE) { | |
perror("leeek"); | |
return EXIT_FAILURE; | |
} | |
_Bool state = 0; | |
ssize_t chunk; | |
for (;;) { | |
unsigned char buf[65536]; | |
chunk = read(fd, buf, sizeof(buf)); | |
if (chunk < 1) | |
break; | |
if (chunk < (int)sizeof(buf)) | |
memset(buf + chunk, ' ', (8 - chunk) & 7); | |
state = process_chunk(buf, chunk, state); | |
} | |
if (chunk < 0) { | |
perror("read"); | |
return EXIT_FAILURE; | |
} | |
return EXIT_SUCCESS; | |
} | |
int main(int argc, const char *argv[]) { | |
int rc = 42; | |
if (argc > 1) { | |
for (int i = 1; i < argc; ++i) { | |
int fd = | |
(strcmp(argv[i], "-") == 0) ? STDIN_FILENO : open(argv[1], O_RDONLY); | |
if (fd < 0) { | |
perror("open"); | |
return EXIT_FAILURE; | |
} | |
rc = process_fd(fd); | |
close(fd); | |
if (rc != EXIT_SUCCESS) | |
break; | |
} | |
} else | |
rc = process_fd(STDIN_FILENO); | |
struct timespec ts = {0, 0}; | |
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts)) | |
perror("clock_gettime(CLOCK_PROCESS_CPUTIME_ID)"); | |
if (rc == EXIT_SUCCESS) | |
printf("lines %zu, words %zu, chars %zu\ntook %.6f seconds\n", result.lines, | |
result.words, result.chars, ts.tv_nsec * 1e-9 + ts.tv_sec); | |
return rc; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment