Skip to content

Instantly share code, notes, and snippets.

@koji-hirono
Last active August 29, 2015 13:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koji-hirono/88e05e394b5754e19eb7 to your computer and use it in GitHub Desktop.
Save koji-hirono/88e05e394b5754e19eb7 to your computer and use it in GitHub Desktop.
read THOR(CSV file)
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "trace.h"
#include "line.h"
#include "csv.h"
static int istext(CSV *);
static int accept(CSV *, int);
static CSV *prepare(CSV *);
static void gettoken(CSV *);
int
csv_read_open(CSV *csv, const char *fname)
{
if ((csv->fp = fopen(fname, "rb")) == NULL)
errt(-1, "fopen(`%s') failed. %s\n",
fname, strerror(errno));
csv->token = fgetc(csv->fp);
return 0;
}
void
csv_close(CSV *csv)
{
fclose(csv->fp);
}
int
csv_getline(CSV *csv, Line *line)
{
if (csv->token == EOF)
return 0;
line_init(line);
do {
if (accept(csv, '"')) {
while (!accept(prepare(csv), '"')) {
line_add(line, csv->token);
gettoken(csv);
}
} else {
while (istext(csv)) {
line_add(line, csv->token);
gettoken(csv);
}
}
line_add(line, '\0');
} while (accept(csv, ','));
accept(csv, '\r');
accept(csv, '\n');
return 1;
}
static int
istext(CSV *csv)
{
if (csv->token == ',')
return 0;
if (csv->token == '\n')
return 0;
if (csv->token == '\r')
return 0;
if (csv->token == EOF)
return 0;
return 1;
}
static int
accept(CSV *csv, int expect)
{
if (csv->token != expect)
return 0;
gettoken(csv);
return 1;
}
static CSV *
prepare(CSV *csv)
{
int c;
if (csv->token != '"')
return csv;
if ((c = fgetc(csv->fp)) == '"')
csv->token |= (c << 8);
else
ungetc(c, csv->fp);
return csv;
}
static void
gettoken(CSV *csv)
{
csv->token = fgetc(csv->fp);
if (isprint(csv->token))
D("token: `%c'", csv->token);
else
D("token: <0x%02x>", csv->token);
}
#ifndef __CSV_H__
#define __CSV_H__
#define CSV_FIELD_NUMS 32
#define CSV_FIELD_SIZE 32
typedef struct CSV CSV;
struct CSV {
FILE *fp;
int token;
};
extern int csv_read_open(CSV *, const char *);
extern void csv_close(CSV *);
extern int csv_getline(CSV *, Line *);
#endif /* !__CSV_H__ */
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "trace.h"
#include "line.h"
int
line_alloc(Line *line, int n, size_t size)
{
Field *f;
Field *e;
line->nmax = n ? n : 1;
line->def_size = size ? size : 1;
if ((line->field = malloc(line->nmax * sizeof(Field))) == NULL)
errt(-1, "malloc(%zu) failed. %s",
line->nmax * sizeof(Field), strerror(errno));
line->n = 0;
e = line->field + line->nmax;
for (f = line->field; f < e; f++) {
f->size = size;
if ((f->s = malloc(f->size)) == NULL) {
E("malloc(%zu) failed. %s", f->size, strerror(errno));
while (--f >= line->field)
free(f->s);
free(line->field);
return -1;
}
f->len = 0;
}
return 0;
}
void
line_free(Line *line)
{
int i;
for (i = 0; i < line->nmax; i++)
free(line->field[i].s);
free(line->field);
}
void
line_init(Line *line)
{
int i;
for (i = 0; i < line->nmax; i++)
line->field[i].len = 0;
line->n = 0;
}
int
line_add(Line *line, int ch)
{
Field *f;
D("ch: `%c(%#x)'", ch, ch);
D("n: %d, nmax: %d", line->n, line->nmax);
if (line->n + 1 >= line->nmax)
if (line_expand(line) != 0)
errt(-1, "line_expand failed.");
f = line->field + line->n;
D("len: %zu, size: %zu", f->len, f->size);
if (f->len + 1 >= f->size)
if (field_expand(f) != 0)
errt(-1, "field_expand failed.");
f->s[f->len++] = ch & 0xff;
if (ch == '\0')
line->n++;
return 0;
}
int
line_expand(Line *line)
{
Field *f;
void *p;
int org;
int i;
org = line->nmax;
while (line->n + 1 >= line->nmax)
line->nmax *= 2;
if ((p = realloc(line->field, line->nmax * sizeof(Field))) == NULL)
errt(-1, "realloc(%zu) failed. %s",
line->nmax * sizeof(Field), strerror(errno));
line->field = p;
for (i = org - 1; i < line->nmax; i++) {
f = line->field + i;
f->size = line->def_size;
if ((f->s = malloc(f->size)) == NULL) {
E("malloc(%zu) failed. %s", f->size, strerror(errno));
while (--f >= line->field)
free(f->s);
return -1;
}
f->len = 0;
}
return 0;
}
int
field_expand(Field *f)
{
void *p;
while (f->len + 1 >= f->size)
f->size *= 2;
if ((p = realloc(f->s, f->size)) == NULL)
errt(-1, "realloc(%zu) failed. %s", f->size, strerror(errno));
f->s = p;
return 0;
}
#ifndef __LINE_H__
#define __LINE_H__
typedef struct Field Field;
typedef struct Line Line;
struct Field {
size_t size;
size_t len;
char *s;
};
struct Line {
size_t def_size;
int nmax;
int n;
Field *field;
};
extern int line_alloc(Line *, int, size_t);
extern void line_free(Line *);
extern void line_init(Line *);
extern int line_add(Line *, int);
extern int line_expand(Line *);
extern int field_expand(Field *);
#endif /* !__LINE_H__ */
PROG=read_csv
SRCS=\
read_csv.c \
csv.c \
line.c \
trace.c
OBJS=$(SRCS:.c=.o)
DEPS=$(SRCS:.c=.dep)
CC=gcc
CFLAGS=-Wall -W
all: $(PROG)
clean:
rm -f $(PROG) $(OBJS) $(DEPS) *~
$(PROG): $(OBJS)
$(CC) $(CFLAGS) $^ -o $@ $(LDADD)
$(OBJS): %.o : %.c
$(CC) -c $(CFLAGS) $< -o $@
$(DEPS): %.dep : %.c
@echo "===> Update" $@
@$(CC) -MM $< -o $@
ifneq ($(MAKECMDGOALS),clean)
-include $(DEPS)
endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include "trace.h"
#include "line.h"
#include "csv.h"
void usage(const char *);
void show(Line *, Line *, int, int);
size_t maxstring(Line *);
const char *stripbom(const char *);
int
main(int argc, char **argv)
{
const char *progname;
const char *fname;
CSV csv;
Line head;
Line line;
int width;
int opt;
int n;
progname = argv[0];
while ((opt = getopt(argc, argv, "d")) != -1) {
switch (opt) {
case 'd':
debug = 1;
break;
default:
usage(progname);
return EXIT_FAILURE;
}
}
argc -= optind;
argv += optind;
fname = (argc < 1) ? "THOR_Vietnam/THOR_Vietnam_7_31_2013.csv" : *argv;
if (csv_read_open(&csv, fname) != 0) {
fprintf(stderr, "Error: csv_open(`%s') failed.\n", fname);
return EXIT_FAILURE;
}
if (line_alloc(&head, CSV_FIELD_NUMS, CSV_FIELD_SIZE) != 0) {
fprintf(stderr, "Error: line_alloc failed.\n");
csv_close(&csv);
return EXIT_FAILURE;
}
if (csv_getline(&csv, &head) == 0) {
fprintf(stderr, "Error: csv_getline failed.\n");
line_free(&head);
csv_close(&csv);
return EXIT_FAILURE;
}
width = maxstring(&head);
if (line_alloc(&line, CSV_FIELD_NUMS, CSV_FIELD_SIZE) != 0) {
fprintf(stderr, "Error: line_alloc failed.\n");
line_free(&head);
csv_close(&csv);
return EXIT_FAILURE;
}
for (n = 1; csv_getline(&csv, &line); n++)
show(&head, &line, n, width);
line_free(&head);
line_free(&line);
csv_close(&csv);
return EXIT_SUCCESS;
}
void
usage(const char *progname)
{
fprintf(stderr, "usage: %s [-d] CSVFILE\n", progname);
}
void
show(Line *head, Line *line, int n, int width)
{
const char *t;
const char *d;
int max;
int i;
printf("Data#%d\n", n);
if (line->n != head->n)
fprintf(stderr, "Warning: number of fields %d != %d\n",
line->n, head->n);
max = (line->n > head->n) ? line->n : head->n;
for (i = 0; i < max; i++) {
t = (i < head->n) ? stripbom(head->field[i].s) : "(Undefined)";
d = (i < line->n) ? line->field[i].s : "(Undefined)";
printf("%*.*s: %s\n", width, width, t, d);
}
printf("\n");
}
size_t
maxstring(Line *line)
{
Field *e;
Field *f;
size_t max;
max = 0;
e = line->field + line->n;
for (f = line->field; f < e; f++)
if (f->len > max)
max = f->len;
return max;
}
const char *
stripbom(const char *s)
{
if ((*s & 0xff) != 0xef)
return s;
if ((*++s & 0xff) != 0xbb)
return s;
if ((*++s & 0xff) != 0xbf)
return s;
return ++s;
}
#include "trace.h"
int debug = 0;
#ifndef __TRACE_H__
#define __TRACE_H__
#include <stdio.h>
#define D(...) do { \
if (debug) { \
printf("<%s:%d> ", __func__, __LINE__); \
printf(__VA_ARGS__); \
printf("\n"); \
} \
} while (0)
#define E(...) do { \
printf("Error: "); \
printf(__VA_ARGS__); \
printf("\n"); \
} while (0)
#define errt(r, ...) do { \
E(__VA_ARGS__); \
return r; \
} while (0)
extern int debug;
#endif /* !__TRACE_H__ */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment