Skip to content

Instantly share code, notes, and snippets.

@marshall
Last active January 3, 2016 08:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marshall/8440053 to your computer and use it in GitHub Desktop.
Save marshall/8440053 to your computer and use it in GitHub Desktop.
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "csv.h"
char *city = "test";
int row_index = 0;
void data_callback(void *string, size_t i, void *data) {
if (row_index == 1 && strncmp(city, string, strlen(city)) == 0) {
printf("found\n");
}
row_index++;
}
void row_callback(int c, void *data) {
row_index = 0;
}
int main(int argc, char **argv) {
if (argc > 0) {
city = argv[1];
}
struct csv_parser parser;
char buf[1024];
size_t bytes_read;
csv_init(&parser, 0);
while (1) {
bytes_read = fread(buf, 1, 1024, stdin);
if (bytes_read > 0) {
if (csv_parse(&parser, buf, bytes_read, data_callback, row_callback, NULL) != bytes_read) {
fprintf(stderr, "Error: %s\n", csv_strerror(csv_error(&parser)));
exit(EXIT_FAILURE);
}
}
if (bytes_read != 1024) {
break;
}
}
csv_fini(&parser, data_callback, row_callback, NULL);
csv_free(&parser);
return EXIT_SUCCESS;
}
var csv = require('csv');
process.stdin.resume();
process.stdin.setEncoding('utf8');
var city = process.argv[2];
csv()
.from.stream(process.stdin)
.on('record', function(row, index) {
if (row[1] == city) {
console.log(JSON.stringify(row));
}
});
import csv
import sys
reader = csv.reader(sys.stdin)
city = sys.argv[1]
for row in reader:
if row[1] == city:
print row
$ time gzcat ~/Downloads/worldcitiespop.txt.gz | node csvfind.js "lake dallas"

["us","lake dallas","Lake Dallas","TX","","33.1191667","-97.0252778"]
gzcat ~/Downloads/worldcitiespop.txt.gz  0.45s user 0.04s system 2% cpu 24.880 total
node csvfind.js "lake dallas"  24.89s user 0.16s system 100% cpu 24.901 total

$ time gzcat ~/Downloads/worldcitiespop.txt.gz | python csvfind.py "lake dallas"
['us', 'lake dallas', 'Lake Dallas', 'TX', '', '33.1191667', '-97.0252778']
gzcat ~/Downloads/worldcitiespop.txt.gz  0.43s user 0.03s system 16% cpu 2.742 total
python csvfind.py "lake dallas"  2.67s user 0.03s system 98% cpu 2.745 total

$ time gzcat ~/Downloads/worldcitiespop.txt.gz | ./csvfind "lake dallas"
found
gzcat ~/Downloads/worldcitiespop.txt.gz  0.43s user 0.04s system 55% cpu 0.835 total
./csvfind "lake dallas"  0.82s user 0.01s system 99% cpu 0.834 total
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment