Skip to content

Instantly share code, notes, and snippets.

@blakesmith
Last active January 3, 2016 03:49
Show Gist options
  • Save blakesmith/8404563 to your computer and use it in GitHub Desktop.
Save blakesmith/8404563 to your computer and use it in GitHub Desktop.
nginx log merger. Useful if you have logs distributed on many app nodes that you'd like unified into one big log file.
CC=gcc
CFLAGS=-Wall -pendatic -03 -std=c99
PROG=nginx_log_merger
all: nginx_log_merger.o
nginx_log_merger.o:
$(CC) $(CFLAGS) nginx_log_merger.c -o $(PROG)
clean:
rm -rf nginx_log_merger
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <getopt.h>
#define MAX_LINE 512
#define DATE_SIZE 64
#define MAX_FILE_COUNT 64
#define MAX_FILE_LEN 512
#define SECONDSPERDAY 86400
struct opt {
char infile_names[MAX_FILE_COUNT][MAX_FILE_LEN];
char outfile_name[MAX_FILE_LEN];
int num_infiles;
int date_pos;
};
typedef struct logfile {
const char *name;
FILE *infile;
char cur_position[MAX_LINE];
time_t cur_date;
int done;
} logfile;
typedef struct logcollection {
logfile *logs;
int num_logs;
int num_done;
} logcollection;
static int find_date(char *in, char *buf, int len, char delim, int pos) {
char *start;
int i = 0;
int cur_delim = 0;
char *ch = in;
int date_length, copy_length;
while (*ch != '\0') {
if (*ch == delim) {
cur_delim++;
if (i == 0 && pos == 0) {
start = ch;
} else if (cur_delim == pos) {
start = ch+(sizeof(char)*1);
}
if (cur_delim > pos) {
date_length = ch - start - (sizeof(char)*6);
if (date_length+1 > len) {
copy_length = len;
} else {
copy_length = date_length;
}
strncpy(buf, start, copy_length);
buf[copy_length] = '\0';
return 0;
}
}
i++;
ch++;
}
return -1;
}
static int find_next_log(logcollection *coll) {
time_t *min = NULL;
int pos = 0;
int i;
double diff;
for (i = 0; i < coll->num_logs; ++i) {
if (min == NULL) {
min = &coll->logs[i].cur_date;
pos = i;
continue;
}
diff = difftime(*min, coll->logs[i].cur_date);
if (diff > 0) {
min = &coll->logs[i].cur_date;
pos = i;
}
}
return pos;
}
void invaliddate() {
fprintf (stderr, "invalid date/time specification\n");
exit(1);
}
static void parseISO8601(char *text, time_t *isotime) {
char *c;
struct tm tmstruct;
int year = 0;
int month = 0;
int seconds = 0;
int minutes = 0;
int hours = 0;
int days = 0;
c = text;
*isotime = 0;
/* point in time, must be one of
CCYYMMDD
CCYY-MM-DD
CCYYMMDDTHHMM
CCYY-MM-DDTHH:MM
CCYYMMDDTHHMMSS
CCYY-MM-DDTHH:MM:SS
*/
c = text;
/* NOTE: we have to check for the extended format first,
because otherwise the separting '-' will be interpreted
by sscanf as signs of a 1 digit integer .... :-( */
if (sscanf(text, "%4u-%2u-%2u", &year, &month, &days) == 3) {
c += 10;
}
else if (sscanf(text, "%4u%2u%2u", &year, &month, &days) == 3) {
c += 8;
}
else {
invaliddate();
}
tmstruct.tm_year = year - 1900;
tmstruct.tm_mon = month - 1;
tmstruct.tm_mday = days;
if (*c == '\0') {
tmstruct.tm_hour = 0;
tmstruct.tm_sec = 0;
tmstruct.tm_min = 0;
*isotime = mktime (&tmstruct);
}
else if (*c == 'T') {
/* time of day part */
c++;
if (sscanf(c, "%2d%2d", &hours, &minutes) == 2) {
c += 4;
}
else if (sscanf(c, "%2d:%2d", &hours, &minutes) == 2) {
c += 5;
}
else {
invaliddate();
}
if (*c == ':') {
c++;
}
if (*c != '\0') {
if (sscanf(c, "%2d", &seconds) == 1) {
c += 2;
}
else {
invaliddate();
}
if (*c != '\0') { /* something left? */
invaliddate();
}
}
tmstruct.tm_hour = hours;
tmstruct.tm_min = minutes;
tmstruct.tm_sec = seconds;
*isotime = mktime (&tmstruct);
}
else {
invaliddate();
}
}
static void set_date(logcollection *coll, int log_pos, int date_pos) {
char *out;
char buf[DATE_SIZE];
int res;
time_t date;
out = fgets(coll->logs[log_pos].cur_position, MAX_LINE, coll->logs[log_pos].infile);
if (out == NULL) {
coll->num_done++;
coll->logs[log_pos].done = 1;
return;
}
res = find_date(out, buf, DATE_SIZE, ' ', date_pos);
if (res < 0) {
fprintf(stderr, "Unable to find the date field\n");
}
parseISO8601(buf, &date);
coll->logs[log_pos].cur_date = date;
}
static int logcollection_init(logcollection *coll, char *filenames, int num_files, int date_pos) {
int i;
char *filename;
coll->logs = malloc(sizeof(logfile)*num_files);
coll->num_logs = num_files;
coll->num_done = 0;
for (i = 0; i < num_files; ++i) {
filename = filenames + (i * MAX_FILE_LEN);
coll->logs[i].infile = fopen(filename, "r");
if (coll->logs[i].infile == NULL) {
fprintf(stderr, "Unable to open infile: %s, %s\n", filename, strerror(errno));
return -1;
}
coll->logs[i].name = filename;
coll->logs[i].done = 0;
coll->logs[i].cur_date = 0;
set_date(coll, i, date_pos);
}
return 0;
}
static int logcollection_free(logcollection *coll) {
int res, i;
for (i = 0; i < coll->num_logs; ++i) {
res = fclose(coll->logs[i].infile);
if (res != 0) {
return -1;
}
}
free(coll->logs);
return 0;
}
int merge_logs(logcollection *coll, int date_pos, FILE *outfile) {
int next_log;
int written = 0;
logfile *cur_log = NULL;
char *out;
while (coll->num_done < coll->num_logs) {
next_log = find_next_log(coll);
written = 0;
cur_log = &coll->logs[next_log];
written = fwrite(cur_log->cur_position, 1, strlen(cur_log->cur_position), outfile);
while (written == MAX_LINE-1) {
out = fgets(cur_log->cur_position, MAX_LINE, cur_log->infile);
if (out == NULL) {
fprintf(stderr, "Error reading from log file: %s\n", strerror(errno));
return -1;
}
written = fwrite(cur_log->cur_position, 1, strlen(cur_log->cur_position), outfile);
}
set_date(coll, next_log, date_pos);
}
return 0;
}
static void display_help() {
printf("USAGE: nginx_log_merger [OPTIONS]\r\n\r\n"
"OPTIONS:\r\n"
" -i Logfile to merge (may specify multiple -i args)\r\n"
" -o Merged output file\r\n"
" -d (optional) 0 based index position of the ISO8601 date. Used as the comparator. Default: 3\r\n");
}
static int parse_opts(struct opt *op, int argc, char **argv) {
int ch;
int in_spec, out_spec = 0;
static struct option longopts[] = {
{ "infile", required_argument, 0, 'i' },
{ "outfile", required_argument, 0, 'o' },
{ "date-pos", required_argument, 0, 'd' },
{ "help", no_argument, 0, 'h' },
{ 0, 0, 0, 0 }
};
/* Default date position to the 3rd index, 0 based */
op->date_pos = 3;
while ((ch = getopt_long(argc, argv, "i:o:d:", longopts, NULL)) != -1) {
switch (ch) {
case 'i':
if (op->num_infiles > MAX_FILE_COUNT+1) {
fprintf(stderr, "You have exceeded the maximum allowed files of: %d\n", MAX_FILE_COUNT);
return -1;
}
strncpy(op->infile_names[op->num_infiles], optarg, strlen(optarg));
op->num_infiles++;
in_spec = 1;
break;
case 'o':
strncpy(op->outfile_name, optarg, strlen(optarg));
out_spec = 1;
break;
case 'd':
op->date_pos = atoi(optarg);
break;
case 'h':
default:
return -1;
}
}
if (!in_spec && !out_spec) {
return -1;
}
return 0;
}
int main(int argc, char **argv) {
struct opt op;
logcollection coll;
int res;
int date_pos = 3;
FILE *outfile;
res = parse_opts(&op, argc, argv);
if (res < 0) {
display_help();
exit(-1);
}
outfile = fopen(op.outfile_name, "w+");
if (outfile == NULL) {
fprintf(stderr, "Unable to open outfile\n");
}
res = logcollection_init(&coll, *op.infile_names, op.num_infiles, date_pos);
if (res) {
fprintf(stderr, "Unable to initialize log collection\n");
exit(-1);
}
printf("Merging logs...\n");
res = merge_logs(&coll, date_pos, outfile);
if (res < 0) {
fprintf(stderr, "Log merge failed: %s\n", strerror(errno));
exit(-1);
}
printf("Done!\n");
res = logcollection_free(&coll);
if (res) {
fprintf(stderr, "Unable to free log collection\n");
exit(-1);
}
fclose(outfile);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment