Last active
March 24, 2016 23:35
-
-
Save raydog/3a7e63e5e5c63f3c9366 to your computer and use it in GitHub Desktop.
A simple utility to read a plain text file into a buffer, and safely iterate over it line-by-line.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <errno.h> | |
#include <string.h> | |
#include "linebuffer.h" | |
#define LINE_BUFFER_LENGTH 1024 | |
/* Our internal structure: */ | |
struct LineBuffer { | |
char *buf; | |
size_t bufidx; | |
size_t buflen; | |
int is_eof; | |
FILE *handle; | |
}; | |
/** | |
* This function will move the current line to the front of the buffer, | |
* and then bring in one more buffer's worth of text to place at the end. | |
* | |
* @param lb The LineBuffer to operate on. | |
* @return 1 if failure. 0 if ok. | |
*/ | |
static int _pull_in_data(LineBuffer *lb) { | |
if (lb == NULL) { return 1; } | |
/* Don't do anything if we have reached the EOF: */ | |
if (lb->is_eof) { return 0; } | |
/* Calculate the lengths: */ | |
size_t old_data_len = lb->buflen - lb->bufidx; | |
size_t num_chars = old_data_len + LINE_BUFFER_LENGTH; | |
/* Allocate our initial buffer: */ | |
char *buf = (char *) malloc ( num_chars * sizeof(char) ); | |
if (buf == NULL) { | |
fprintf(stderr, "Failed to allocate a new buffer for a LineBuffer\n"); | |
return 1; | |
} | |
/* Copy the old data into the struct, if we have any: */ | |
if (old_data_len) { | |
strncpy(buf, &lb->buf[lb->bufidx], old_data_len); | |
} | |
/* Copy data from the file into the buffer: */ | |
size_t result = fread( &buf[old_data_len], sizeof(char), LINE_BUFFER_LENGTH, lb->handle); | |
/* The old buffer, if it exists, isn't needed anymore: */ | |
if (lb->buf) { free(lb->buf); } | |
/* Copy in new values: */ | |
lb->buf = buf; | |
lb->bufidx = 0; | |
lb->buflen = old_data_len + result; | |
lb->is_eof = feof(lb->handle); | |
return 0; | |
} | |
LineBuffer *line_buffer_new_from_filename(char *filename) { | |
if (filename == NULL) { | |
fprintf(stderr, "ERROR: Null filename handed to LineBuffer\n"); | |
return NULL; | |
} | |
/* Try to open the file for reading: */ | |
FILE *handle = fopen(filename, "r"); | |
if (handle == NULL) { | |
char *failstr = strerror(errno); | |
fprintf(stderr, "ERROR: Failed to open `%s`: %s\n", filename, failstr); | |
return NULL; | |
} | |
/* Simple allocation: */ | |
LineBuffer *out = (LineBuffer*) malloc(sizeof(LineBuffer)); | |
if (out == NULL) { | |
fprintf(stderr, "Failed to allocate LineBuffer\n"); | |
return NULL; | |
} | |
/* Init the structure : */ | |
out->buf = NULL; | |
out->bufidx = 0; | |
out->buflen = 0; | |
out->is_eof = 0; | |
out->handle = handle; | |
/* Bring in our first batch of data: */ | |
if ( _pull_in_data(out) ) { | |
free(out); | |
return NULL; | |
} | |
return out; | |
} | |
void line_buffer_destroy(LineBuffer *lb) { | |
if (lb == NULL) { return; } | |
if (lb->handle) { fclose(lb->handle); } | |
if (lb->buf) { free(lb->buf); } | |
free(lb); | |
} | |
/** | |
* Will return the next line in the file. A line is terminated by either \r, | |
* \n, or \r\n. | |
* | |
* The line | |
* | |
* @param lb The LineBuffer object to use. | |
* @param out_len Output for the line length, which includes the newline characters, if any. | |
* @return The string. Returns NULL on failure. | |
*/ | |
char *line_buffer_read_line(LineBuffer *lb, size_t *out_len) { | |
size_t len = 0; | |
int is_cr = 0; | |
while ( 1 ) { | |
/* Pull in more characters if we need them: */ | |
if ( (lb->bufidx + len) >= lb->buflen ) { | |
/* Check for EOF: */ | |
if ( lb->is_eof ) { | |
char *ret = &lb->buf[lb->bufidx]; | |
lb->bufidx += len; | |
*out_len = len; | |
return ret; | |
} | |
/* If not EOF, then just bring in more data: */ | |
if ( _pull_in_data(lb) ) { | |
return NULL; | |
} | |
} | |
/* Check a character: */ | |
char ch = lb->buf[lb->bufidx + len]; | |
len ++; | |
if (ch == '\n') { | |
/* No matter what, a \n finishes this string. */ | |
char *ret = &lb->buf[lb->bufidx]; | |
is_cr = 0; | |
lb->bufidx += len; | |
*out_len = len; | |
return ret; | |
} | |
if (is_cr) { | |
/* We have to stop here. */ | |
char *ret = &lb->buf[lb->bufidx]; | |
is_cr = 0; | |
lb->bufidx += len - 1; | |
*out_len = len - 1; | |
return ret; | |
} else { | |
if (ch == '\r') { | |
is_cr = 1; | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _LINE_BUFFER_H_ | |
#define _LINE_BUFFER_H_ | |
/** | |
* LineBuffer: Used to read a file, line-by-line | |
*/ | |
typedef struct LineBuffer LineBuffer; | |
/** | |
* Will create a new LineBuffer object. | |
* @param filename The file to read from. | |
* @return The allocated linebuffer object, or NULL on failure. | |
*/ | |
LineBuffer *line_buffer_new_from_filename(char *filename) ; | |
/** | |
* Will destroy a LineBuffer object. Do this to close the readable file, | |
* and release all memory allocated by the buffer. | |
* | |
* @param lb The LineBuffer object to destroy. | |
*/ | |
void line_buffer_destroy(LineBuffer *lb) ; | |
/** | |
* Will read a single line from the LineBuffer and return it. | |
* @param lb The LineBuffer to read from. | |
* @param out_len The length of the line is returned at this address. | |
* @return The starting character in the line. Note that the | |
* address is a raw pointer into the internal buffer. | |
* This is safe so long as you don't call this function | |
* again while you need values in the current line. | |
*/ | |
char *line_buffer_read_line(LineBuffer *lb, size_t *out_len) ; | |
#endif /* _LINE_BUFFER_H_ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment