Skip to content

Instantly share code, notes, and snippets.

@jots
Last active August 29, 2015 14:00
Show Gist options
  • Save jots/11461543 to your computer and use it in GitHub Desktop.
Save jots/11461543 to your computer and use it in GitHub Desktop.
count lines with memchr
#include <stdio.h>
#define BUFSIZE 8*1024
int main()
{
FILE *fp = stdin; /* or use fopen to open a file */
char buf[BUFSIZE+1];
unsigned long c, n = 0;
int i, chars_read;
while((chars_read = fread(buf, 1, BUFSIZE, stdin)) > 0){
for(i=0; i< chars_read; i++) {
c++;
if(buf[i] == '\n') {
n++;
}
}
}
printf("%lu %lu\n",n,c);
return 0;
}
#include <stdio.h>
#include <string.h> // memchr
#include <unistd.h>
#define BUFSIZE 8*1024
int main()
{
FILE *fp = stdin;
char buf[BUFSIZE+1];
unsigned long c = 0, n = 0;
int i, bytes_read;
//while((bytes_read = fread(buf, 1, BUFSIZE, stdin)) > 0){
while((bytes_read = read(STDIN_FILENO,buf,BUFSIZE)) > 0){
char *p = buf;
while ((p = memchr (p, '\n', (buf + bytes_read) - p))) {
++p;
++n;
}
c += bytes_read;
}
printf("lines: %lu bytes: %lu\n",n,c);
return 0;
}
split7.nim works like lc.c. same basic performance.
takes 3 seconds to count 2,900,000,000 byte file (100mm lines)
(from OS cache of course)
I want to have split8.nim work similar to
lc2.c. lc2 runs in 0.9 seconds for same file.
I am having trouble duplicating lc2.c in nimrod.
Am I calling memchr() wrong?
Also, for my curiosity: When I do:
rlen = f.readBuffer(bufp, SIZE)
if rlen == 0: break
lc = buf.countLines() # XXX fails: string does not have "len" property
Why no len property?
import strutils
# this is similar to lc.c method
proc mycountLines*(s: string,len:int): int =
for i in 0..len:
if s[i] == '\l': inc result
proc main =
const SIZE = 8192
var
rlen = 0
totlen = 0
buf = newStringOfCap(SIZE)
lc = 0
totlines = 0
bufp = addr buf[0]
f = stdin
while true:
rlen = f.readBuffer(bufp, SIZE)
if rlen == 0: break
lc = buf.mycountLines(rlen)
totlines.inc(lc)
totlen.inc(rlen)
echo totlen
echo totlines
main()
import strutils
# similar to lc2.c
proc c_memchr(cstr: cstring, c:int, p:pointer): cstring {.
importc: "memchr", header: "<string.h>".}
# XXX this doesn't seem to be working.
proc mycountLines*(s: string, len:int): int =
var p:pointer
while true:
# '10' is newline?
p = c_memchr(s, 10, p)
if p == cast[pointer](0): break
inc result
proc main =
const SIZE = 8192
var
rlen = 0
totlen = 0
buf = newStringOfCap(SIZE)
lc = 0
totlines = 0
bufp = addr buf[0]
f = stdin
while true:
rlen = f.readBuffer(bufp, SIZE)
if rlen == 0: break
lc = buf.mycountLines(rlen)
totlines.inc(lc)
totlen.inc(rlen)
echo totlen
echo totlines
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment