-
-
Save koji-hirono/88e05e394b5754e19eb7 to your computer and use it in GitHub Desktop.
read THOR(CSV file)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <errno.h> | |
#include "trace.h" | |
#include "line.h" | |
#include "csv.h" | |
static int istext(CSV *); | |
static int accept(CSV *, int); | |
static CSV *prepare(CSV *); | |
static void gettoken(CSV *); | |
int | |
csv_read_open(CSV *csv, const char *fname) | |
{ | |
if ((csv->fp = fopen(fname, "rb")) == NULL) | |
errt(-1, "fopen(`%s') failed. %s\n", | |
fname, strerror(errno)); | |
csv->token = fgetc(csv->fp); | |
return 0; | |
} | |
void | |
csv_close(CSV *csv) | |
{ | |
fclose(csv->fp); | |
} | |
int | |
csv_getline(CSV *csv, Line *line) | |
{ | |
if (csv->token == EOF) | |
return 0; | |
line_init(line); | |
do { | |
if (accept(csv, '"')) { | |
while (!accept(prepare(csv), '"')) { | |
line_add(line, csv->token); | |
gettoken(csv); | |
} | |
} else { | |
while (istext(csv)) { | |
line_add(line, csv->token); | |
gettoken(csv); | |
} | |
} | |
line_add(line, '\0'); | |
} while (accept(csv, ',')); | |
accept(csv, '\r'); | |
accept(csv, '\n'); | |
return 1; | |
} | |
static int | |
istext(CSV *csv) | |
{ | |
if (csv->token == ',') | |
return 0; | |
if (csv->token == '\n') | |
return 0; | |
if (csv->token == '\r') | |
return 0; | |
if (csv->token == EOF) | |
return 0; | |
return 1; | |
} | |
static int | |
accept(CSV *csv, int expect) | |
{ | |
if (csv->token != expect) | |
return 0; | |
gettoken(csv); | |
return 1; | |
} | |
static CSV * | |
prepare(CSV *csv) | |
{ | |
int c; | |
if (csv->token != '"') | |
return csv; | |
if ((c = fgetc(csv->fp)) == '"') | |
csv->token |= (c << 8); | |
else | |
ungetc(c, csv->fp); | |
return csv; | |
} | |
static void | |
gettoken(CSV *csv) | |
{ | |
csv->token = fgetc(csv->fp); | |
if (isprint(csv->token)) | |
D("token: `%c'", csv->token); | |
else | |
D("token: <0x%02x>", csv->token); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __CSV_H__ | |
#define __CSV_H__ | |
#define CSV_FIELD_NUMS 32 | |
#define CSV_FIELD_SIZE 32 | |
typedef struct CSV CSV; | |
struct CSV { | |
FILE *fp; | |
int token; | |
}; | |
extern int csv_read_open(CSV *, const char *); | |
extern void csv_close(CSV *); | |
extern int csv_getline(CSV *, Line *); | |
#endif /* !__CSV_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <string.h> | |
#include <errno.h> | |
#include "trace.h" | |
#include "line.h" | |
int | |
line_alloc(Line *line, int n, size_t size) | |
{ | |
Field *f; | |
Field *e; | |
line->nmax = n ? n : 1; | |
line->def_size = size ? size : 1; | |
if ((line->field = malloc(line->nmax * sizeof(Field))) == NULL) | |
errt(-1, "malloc(%zu) failed. %s", | |
line->nmax * sizeof(Field), strerror(errno)); | |
line->n = 0; | |
e = line->field + line->nmax; | |
for (f = line->field; f < e; f++) { | |
f->size = size; | |
if ((f->s = malloc(f->size)) == NULL) { | |
E("malloc(%zu) failed. %s", f->size, strerror(errno)); | |
while (--f >= line->field) | |
free(f->s); | |
free(line->field); | |
return -1; | |
} | |
f->len = 0; | |
} | |
return 0; | |
} | |
void | |
line_free(Line *line) | |
{ | |
int i; | |
for (i = 0; i < line->nmax; i++) | |
free(line->field[i].s); | |
free(line->field); | |
} | |
void | |
line_init(Line *line) | |
{ | |
int i; | |
for (i = 0; i < line->nmax; i++) | |
line->field[i].len = 0; | |
line->n = 0; | |
} | |
int | |
line_add(Line *line, int ch) | |
{ | |
Field *f; | |
D("ch: `%c(%#x)'", ch, ch); | |
D("n: %d, nmax: %d", line->n, line->nmax); | |
if (line->n + 1 >= line->nmax) | |
if (line_expand(line) != 0) | |
errt(-1, "line_expand failed."); | |
f = line->field + line->n; | |
D("len: %zu, size: %zu", f->len, f->size); | |
if (f->len + 1 >= f->size) | |
if (field_expand(f) != 0) | |
errt(-1, "field_expand failed."); | |
f->s[f->len++] = ch & 0xff; | |
if (ch == '\0') | |
line->n++; | |
return 0; | |
} | |
int | |
line_expand(Line *line) | |
{ | |
Field *f; | |
void *p; | |
int org; | |
int i; | |
org = line->nmax; | |
while (line->n + 1 >= line->nmax) | |
line->nmax *= 2; | |
if ((p = realloc(line->field, line->nmax * sizeof(Field))) == NULL) | |
errt(-1, "realloc(%zu) failed. %s", | |
line->nmax * sizeof(Field), strerror(errno)); | |
line->field = p; | |
for (i = org - 1; i < line->nmax; i++) { | |
f = line->field + i; | |
f->size = line->def_size; | |
if ((f->s = malloc(f->size)) == NULL) { | |
E("malloc(%zu) failed. %s", f->size, strerror(errno)); | |
while (--f >= line->field) | |
free(f->s); | |
return -1; | |
} | |
f->len = 0; | |
} | |
return 0; | |
} | |
int | |
field_expand(Field *f) | |
{ | |
void *p; | |
while (f->len + 1 >= f->size) | |
f->size *= 2; | |
if ((p = realloc(f->s, f->size)) == NULL) | |
errt(-1, "realloc(%zu) failed. %s", f->size, strerror(errno)); | |
f->s = p; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __LINE_H__ | |
#define __LINE_H__ | |
typedef struct Field Field; | |
typedef struct Line Line; | |
struct Field { | |
size_t size; | |
size_t len; | |
char *s; | |
}; | |
struct Line { | |
size_t def_size; | |
int nmax; | |
int n; | |
Field *field; | |
}; | |
extern int line_alloc(Line *, int, size_t); | |
extern void line_free(Line *); | |
extern void line_init(Line *); | |
extern int line_add(Line *, int); | |
extern int line_expand(Line *); | |
extern int field_expand(Field *); | |
#endif /* !__LINE_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
PROG=read_csv | |
SRCS=\ | |
read_csv.c \ | |
csv.c \ | |
line.c \ | |
trace.c | |
OBJS=$(SRCS:.c=.o) | |
DEPS=$(SRCS:.c=.dep) | |
CC=gcc | |
CFLAGS=-Wall -W | |
all: $(PROG) | |
clean: | |
rm -f $(PROG) $(OBJS) $(DEPS) *~ | |
$(PROG): $(OBJS) | |
$(CC) $(CFLAGS) $^ -o $@ $(LDADD) | |
$(OBJS): %.o : %.c | |
$(CC) -c $(CFLAGS) $< -o $@ | |
$(DEPS): %.dep : %.c | |
@echo "===> Update" $@ | |
@$(CC) -MM $< -o $@ | |
ifneq ($(MAKECMDGOALS),clean) | |
-include $(DEPS) | |
endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <errno.h> | |
#include <unistd.h> | |
#include "trace.h" | |
#include "line.h" | |
#include "csv.h" | |
void usage(const char *); | |
void show(Line *, Line *, int, int); | |
size_t maxstring(Line *); | |
const char *stripbom(const char *); | |
int | |
main(int argc, char **argv) | |
{ | |
const char *progname; | |
const char *fname; | |
CSV csv; | |
Line head; | |
Line line; | |
int width; | |
int opt; | |
int n; | |
progname = argv[0]; | |
while ((opt = getopt(argc, argv, "d")) != -1) { | |
switch (opt) { | |
case 'd': | |
debug = 1; | |
break; | |
default: | |
usage(progname); | |
return EXIT_FAILURE; | |
} | |
} | |
argc -= optind; | |
argv += optind; | |
fname = (argc < 1) ? "THOR_Vietnam/THOR_Vietnam_7_31_2013.csv" : *argv; | |
if (csv_read_open(&csv, fname) != 0) { | |
fprintf(stderr, "Error: csv_open(`%s') failed.\n", fname); | |
return EXIT_FAILURE; | |
} | |
if (line_alloc(&head, CSV_FIELD_NUMS, CSV_FIELD_SIZE) != 0) { | |
fprintf(stderr, "Error: line_alloc failed.\n"); | |
csv_close(&csv); | |
return EXIT_FAILURE; | |
} | |
if (csv_getline(&csv, &head) == 0) { | |
fprintf(stderr, "Error: csv_getline failed.\n"); | |
line_free(&head); | |
csv_close(&csv); | |
return EXIT_FAILURE; | |
} | |
width = maxstring(&head); | |
if (line_alloc(&line, CSV_FIELD_NUMS, CSV_FIELD_SIZE) != 0) { | |
fprintf(stderr, "Error: line_alloc failed.\n"); | |
line_free(&head); | |
csv_close(&csv); | |
return EXIT_FAILURE; | |
} | |
for (n = 1; csv_getline(&csv, &line); n++) | |
show(&head, &line, n, width); | |
line_free(&head); | |
line_free(&line); | |
csv_close(&csv); | |
return EXIT_SUCCESS; | |
} | |
void | |
usage(const char *progname) | |
{ | |
fprintf(stderr, "usage: %s [-d] CSVFILE\n", progname); | |
} | |
void | |
show(Line *head, Line *line, int n, int width) | |
{ | |
const char *t; | |
const char *d; | |
int max; | |
int i; | |
printf("Data#%d\n", n); | |
if (line->n != head->n) | |
fprintf(stderr, "Warning: number of fields %d != %d\n", | |
line->n, head->n); | |
max = (line->n > head->n) ? line->n : head->n; | |
for (i = 0; i < max; i++) { | |
t = (i < head->n) ? stripbom(head->field[i].s) : "(Undefined)"; | |
d = (i < line->n) ? line->field[i].s : "(Undefined)"; | |
printf("%*.*s: %s\n", width, width, t, d); | |
} | |
printf("\n"); | |
} | |
size_t | |
maxstring(Line *line) | |
{ | |
Field *e; | |
Field *f; | |
size_t max; | |
max = 0; | |
e = line->field + line->n; | |
for (f = line->field; f < e; f++) | |
if (f->len > max) | |
max = f->len; | |
return max; | |
} | |
const char * | |
stripbom(const char *s) | |
{ | |
if ((*s & 0xff) != 0xef) | |
return s; | |
if ((*++s & 0xff) != 0xbb) | |
return s; | |
if ((*++s & 0xff) != 0xbf) | |
return s; | |
return ++s; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "trace.h" | |
int debug = 0; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef __TRACE_H__ | |
#define __TRACE_H__ | |
#include <stdio.h> | |
#define D(...) do { \ | |
if (debug) { \ | |
printf("<%s:%d> ", __func__, __LINE__); \ | |
printf(__VA_ARGS__); \ | |
printf("\n"); \ | |
} \ | |
} while (0) | |
#define E(...) do { \ | |
printf("Error: "); \ | |
printf(__VA_ARGS__); \ | |
printf("\n"); \ | |
} while (0) | |
#define errt(r, ...) do { \ | |
E(__VA_ARGS__); \ | |
return r; \ | |
} while (0) | |
extern int debug; | |
#endif /* !__TRACE_H__ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment