Skip to content

Instantly share code, notes, and snippets.

@shepik
Forked from khayrov/wcl.c
Created November 11, 2011 19:37
Show Gist options
  • Save shepik/1358997 to your computer and use it in GitHub Desktop.
Save shepik/1358997 to your computer and use it in GitHub Desktop.
fast wc -l
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#define BUF_SIZE 1048576
extern unsigned long memchrcount(void *mem, int c, size_t size);
int main(int argc, char **argv)
{
unsigned long count = 0;
ssize_t bytes_read;
void *buf;
int fd = open(argv[1], O_RDONLY);
if (fd < 0)
{
perror("Cannot open file");
return 1;
}
if (posix_memalign(&buf, 16, BUF_SIZE))
{
fputs("Cannot allocate memory\n", stderr);
close(fd);
return 1;
}
memset(buf, 0, BUF_SIZE);
while ((bytes_read = read(fd, buf, BUF_SIZE)) > 0)
{
count += memchrcount(buf, '\n', BUF_SIZE);
}
close(fd);
free(buf);
printf("%lu\n", count);
return 0;
}
.type memchrcount, @function
memchrcount:
mov %esi, %eax
sal $8, %esi
or %esi, %eax
sal $8, %esi
or %esi, %eax
sal $8, %esi
or %esi, %eax
movd %eax, %xmm5
movsldup %xmm5, %xmm5
movddup %xmm5, %xmm5
movaps %xmm5,%xmm4
movaps %xmm5,%xmm6
movaps %xmm5,%xmm7
xor %eax, %eax
xor %r8, %r8
xor %r9, %r9
xor %r10, %r10
xor %r11, %r11
add %rdi,%rdx
jmp .L2
.p2align 5
.L1:
movaps 0(%rdi), %xmm0
movaps 16(%rdi), %xmm1
movaps 32(%rdi), %xmm2
movaps 48(%rdi), %xmm3
pcmpeqb %xmm4, %xmm0
pcmpeqb %xmm5, %xmm1
pcmpeqb %xmm6, %xmm2
pcmpeqb %xmm7, %xmm3
pmovmskb %xmm0, %r8
pmovmskb %xmm1, %r9
pmovmskb %xmm2, %r10
pmovmskb %xmm3, %r11
popcnt %r8, %r8
popcnt %r9, %r9
popcnt %r10, %r10
popcnt %r11, %r11
add %r8, %r9
add %r10, %r11
add %r9, %rax
add %r11, %rax
add $64, %rdi
.L2:
cmp %rdi, %rdx
jne .L1
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment