jots/lc.c

## lc.c
#include <stdio.h>

#define BUFSIZE 8*1024
int main()
{
  FILE                *fp = stdin;    /* or use fopen to open a file */
  char buf[BUFSIZE+1];
  unsigned long   c,    n = 0;
  int i, chars_read;

  while((chars_read = fread(buf, 1, BUFSIZE, stdin)) > 0){
    for(i=0; i< chars_read; i++) {
      c++;
      if(buf[i] == '\n') {
        n++;
      }
    }
  }

  printf("%lu %lu\n",n,c);

  return 0;
}

## lc2.c
#include <stdio.h>
#include <string.h> // memchr
#include <unistd.h>

#define BUFSIZE 8*1024
int main()
{
  FILE                *fp = stdin;
  char buf[BUFSIZE+1];
  unsigned long   c = 0,    n = 0;
  int i, bytes_read;

  //while((bytes_read = fread(buf, 1, BUFSIZE, stdin)) > 0){
  while((bytes_read = read(STDIN_FILENO,buf,BUFSIZE)) > 0){
    char *p = buf;

    while ((p = memchr (p, '\n', (buf + bytes_read) - p))) {
      ++p;
      ++n;
    }
    c += bytes_read;
  }

  printf("lines: %lu bytes: %lu\n",n,c);

  return 0;
}


## split.txt
split7.nim works like lc.c.  same basic performance.
takes 3 seconds to count 2,900,000,000 byte file (100mm lines)
(from OS cache of course)

I want to have split8.nim work similar to
lc2.c.  lc2 runs in 0.9 seconds for same file.

I am having trouble duplicating lc2.c in nimrod.
Am I calling memchr() wrong?

Also, for my curiosity: When I do:

rlen =  f.readBuffer(bufp, SIZE)
    if rlen == 0: break
    lc = buf.countLines() # XXX fails: string does not have "len" property

Why no len property?

## split7.nim
import strutils

# this is similar to lc.c method

proc mycountLines*(s: string,len:int): int  =
  for i in 0..len:
    if s[i] == '\l': inc result


proc main =
  const SIZE = 8192
  var
    rlen = 0
    totlen = 0
    buf = newStringOfCap(SIZE)
    lc = 0
    totlines = 0
    bufp = addr buf[0]
    f = stdin
  while true:
    rlen =  f.readBuffer(bufp, SIZE)
    if rlen == 0: break
    lc = buf.mycountLines(rlen)
    totlines.inc(lc)
    totlen.inc(rlen)

  echo totlen
  echo totlines

main()

## split8.nim
import strutils

# similar to lc2.c

proc c_memchr(cstr: cstring, c:int, p:pointer): cstring {.
      importc: "memchr", header: "<string.h>".}


# XXX this doesn't seem to be working.
proc mycountLines*(s: string, len:int): int  =
  var p:pointer
  while true:
    # '10' is newline?
    p = c_memchr(s, 10, p)
    if p == cast[pointer](0): break
    inc result

proc main =
  const SIZE = 8192
  var
    rlen = 0
    totlen = 0
    buf = newStringOfCap(SIZE)
    lc = 0
    totlines = 0
    bufp = addr buf[0]
    f = stdin
  while true:
    rlen =  f.readBuffer(bufp, SIZE)
    if rlen == 0: break
    lc = buf.mycountLines(rlen)
    totlines.inc(lc)
    totlen.inc(rlen)

  echo totlen
  echo totlines

main()
	#include <stdio.h>

	#define BUFSIZE 8*1024
	int main()
	{
	FILE fp = stdin; / or use fopen to open a file */
	char buf[BUFSIZE+1];
	unsigned long c, n = 0;
	int i, chars_read;

	while((chars_read = fread(buf, 1, BUFSIZE, stdin)) > 0){
	for(i=0; i< chars_read; i++) {
	c++;
	if(buf[i] == '\n') {
	n++;
	}
	}
	}

	printf("%lu %lu\n",n,c);

	return 0;
	}
	#include <stdio.h>
	#include <string.h> // memchr
	#include <unistd.h>

	#define BUFSIZE 8*1024
	int main()
	{
	FILE *fp = stdin;
	char buf[BUFSIZE+1];
	unsigned long c = 0, n = 0;
	int i, bytes_read;

	//while((bytes_read = fread(buf, 1, BUFSIZE, stdin)) > 0){
	while((bytes_read = read(STDIN_FILENO,buf,BUFSIZE)) > 0){
	char *p = buf;

	while ((p = memchr (p, '\n', (buf + bytes_read) - p))) {
	++p;
	++n;
	}
	c += bytes_read;
	}

	printf("lines: %lu bytes: %lu\n",n,c);

	return 0;
	}
	split7.nim works like lc.c. same basic performance.
	takes 3 seconds to count 2,900,000,000 byte file (100mm lines)
	(from OS cache of course)

	I want to have split8.nim work similar to
	lc2.c. lc2 runs in 0.9 seconds for same file.

	I am having trouble duplicating lc2.c in nimrod.
	Am I calling memchr() wrong?

	Also, for my curiosity: When I do:

	rlen = f.readBuffer(bufp, SIZE)
	if rlen == 0: break
	lc = buf.countLines() # XXX fails: string does not have "len" property

	Why no len property?
	import strutils

	# this is similar to lc.c method

	proc mycountLines*(s: string,len:int): int =
	for i in 0..len:
	if s[i] == '\l': inc result


	proc main =
	const SIZE = 8192
	var
	rlen = 0
	totlen = 0
	buf = newStringOfCap(SIZE)
	lc = 0
	totlines = 0
	bufp = addr buf[0]
	f = stdin
	while true:
	rlen = f.readBuffer(bufp, SIZE)
	if rlen == 0: break
	lc = buf.mycountLines(rlen)
	totlines.inc(lc)
	totlen.inc(rlen)

	echo totlen
	echo totlines

	main()
	import strutils

	# similar to lc2.c

	proc c_memchr(cstr: cstring, c:int, p:pointer): cstring {.
	importc: "memchr", header: "<string.h>".}


	# XXX this doesn't seem to be working.
	proc mycountLines*(s: string, len:int): int =
	var p:pointer
	while true:
	# '10' is newline?
	p = c_memchr(s, 10, p)
	if p == cast[pointer](0): break
	inc result

	proc main =
	const SIZE = 8192
	var
	rlen = 0
	totlen = 0
	buf = newStringOfCap(SIZE)
	lc = 0
	totlines = 0
	bufp = addr buf[0]
	f = stdin
	while true:
	rlen = f.readBuffer(bufp, SIZE)
	if rlen == 0: break
	lc = buf.mycountLines(rlen)
	totlines.inc(lc)
	totlen.inc(rlen)

	echo totlen
	echo totlines

	main()