Skip to content

Instantly share code, notes, and snippets.

@khayrov
Created November 11, 2011 15:34
Show Gist options
  • Save khayrov/1358278 to your computer and use it in GitHub Desktop.
Save khayrov/1358278 to your computer and use it in GitHub Desktop.
fast wc -l
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#define BUF_SIZE 1048576
extern unsigned long memchrcount(void *mem, int c, size_t size);
int main(int argc, char **argv)
{
unsigned long count = 0;
ssize_t bytes_read;
void *buf;
int fd = open(argv[1], O_RDONLY);
if (fd < 0)
{
perror("Cannot open file");
return 1;
}
if (posix_memalign(&buf, 16, BUF_SIZE))
{
fputs("Cannot allocate memory\n", stderr);
close(fd);
return 1;
}
memset(buf, 0, BUF_SIZE);
while ((bytes_read = read(fd, buf, BUF_SIZE)) > 0)
{
count += memchrcount(buf, '\n', BUF_SIZE);
}
close(fd);
free(buf);
printf("%lu\n", count);
return 0;
}
.text
.global memchrcount
.type memchrcount, @function
memchrcount:
mov %esi, %eax
sal $8, %esi
or %esi, %eax
sal $8, %esi
or %esi, %eax
sal $8, %esi
or %esi, %eax
movd %eax, %xmm1
movsldup %xmm1, %xmm1
movddup %xmm1, %xmm1
xor %eax, %eax
xor %r8, %r8
jmp .L2
.p2align 5
.L1:
movaps (%rdi, %r8), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %ecx
popcnt %ecx, %ecx
add %rcx, %rax
add $16, %r8
.L2:
cmp %r8, %rdx
jne .L1
ret
@lvv
Copy link

lvv commented Nov 14, 2011

make -B memchrcount.o
as -o memchrcount.o memchrcount.s
CXXFLAGS=memchrcount.o make -B wcl
cc memchrcount.o -O3 -march=native -I/home/lvv/p wcl.c -o wcl
./wcl /tmp/l
Illegal instruction
uname -a
Linux ahp 3.0.0 #9 SMP PREEMPT Tue Jul 26 10:23:31 EEST 2011 x86_64 Intel(R) Core(TM)2 Duo CPU T7500 @ 2.20GHz GenuineIntel GNU/Linux

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment