-
-
Save jots/11461543 to your computer and use it in GitHub Desktop.
count lines with memchr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#define BUFSIZE 8*1024 | |
int main() | |
{ | |
FILE *fp = stdin; /* or use fopen to open a file */ | |
char buf[BUFSIZE+1]; | |
unsigned long c, n = 0; | |
int i, chars_read; | |
while((chars_read = fread(buf, 1, BUFSIZE, stdin)) > 0){ | |
for(i=0; i< chars_read; i++) { | |
c++; | |
if(buf[i] == '\n') { | |
n++; | |
} | |
} | |
} | |
printf("%lu %lu\n",n,c); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> // memchr | |
#include <unistd.h> | |
#define BUFSIZE 8*1024 | |
int main() | |
{ | |
FILE *fp = stdin; | |
char buf[BUFSIZE+1]; | |
unsigned long c = 0, n = 0; | |
int i, bytes_read; | |
//while((bytes_read = fread(buf, 1, BUFSIZE, stdin)) > 0){ | |
while((bytes_read = read(STDIN_FILENO,buf,BUFSIZE)) > 0){ | |
char *p = buf; | |
while ((p = memchr (p, '\n', (buf + bytes_read) - p))) { | |
++p; | |
++n; | |
} | |
c += bytes_read; | |
} | |
printf("lines: %lu bytes: %lu\n",n,c); | |
return 0; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
split7.nim works like lc.c. same basic performance. | |
takes 3 seconds to count 2,900,000,000 byte file (100mm lines) | |
(from OS cache of course) | |
I want to have split8.nim work similar to | |
lc2.c. lc2 runs in 0.9 seconds for same file. | |
I am having trouble duplicating lc2.c in nimrod. | |
Am I calling memchr() wrong? | |
Also, for my curiosity: When I do: | |
rlen = f.readBuffer(bufp, SIZE) | |
if rlen == 0: break | |
lc = buf.countLines() # XXX fails: string does not have "len" property | |
Why no len property? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import strutils | |
# this is similar to lc.c method | |
proc mycountLines*(s: string,len:int): int = | |
for i in 0..len: | |
if s[i] == '\l': inc result | |
proc main = | |
const SIZE = 8192 | |
var | |
rlen = 0 | |
totlen = 0 | |
buf = newStringOfCap(SIZE) | |
lc = 0 | |
totlines = 0 | |
bufp = addr buf[0] | |
f = stdin | |
while true: | |
rlen = f.readBuffer(bufp, SIZE) | |
if rlen == 0: break | |
lc = buf.mycountLines(rlen) | |
totlines.inc(lc) | |
totlen.inc(rlen) | |
echo totlen | |
echo totlines | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import strutils | |
# similar to lc2.c | |
proc c_memchr(cstr: cstring, c:int, p:pointer): cstring {. | |
importc: "memchr", header: "<string.h>".} | |
# XXX this doesn't seem to be working. | |
proc mycountLines*(s: string, len:int): int = | |
var p:pointer | |
while true: | |
# '10' is newline? | |
p = c_memchr(s, 10, p) | |
if p == cast[pointer](0): break | |
inc result | |
proc main = | |
const SIZE = 8192 | |
var | |
rlen = 0 | |
totlen = 0 | |
buf = newStringOfCap(SIZE) | |
lc = 0 | |
totlines = 0 | |
bufp = addr buf[0] | |
f = stdin | |
while true: | |
rlen = f.readBuffer(bufp, SIZE) | |
if rlen == 0: break | |
lc = buf.mycountLines(rlen) | |
totlines.inc(lc) | |
totlen.inc(rlen) | |
echo totlen | |
echo totlines | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment