Skip to content

Instantly share code, notes, and snippets.

@HappyCerberus
Created March 31, 2015 11:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HappyCerberus/3d6bd6fe1d8dbeeb2929 to your computer and use it in GitHub Desktop.
Save HappyCerberus/3d6bd6fe1d8dbeeb2929 to your computer and use it in GitHub Desktop.
Fast line-by-line file parser
#include "FastLineReader.h"
// POSIX
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
// C++ STD
#include <iostream>
#include <cstring>
using namespace std;
int fastLineParser(const char * const filename, void (*callback)(const char * const, const char * const))
{
int fd = open(filename, O_RDONLY); // open file
if (fd == -1)
{
cerr << "Could not open \"" << filename << "\" for reading (" << strerror(errno) << ")." << endl;
return -1;
}
struct stat fs;
if (fstat(fd, &fs) == -1)
{
cerr << "Could not stat \"" << filename << "\" for reading (" << strerror(errno) << ")." << endl;
close(fd);
return -1;
}
posix_fadvise(fd,0,0,1); // announce the desire to sequentialy read this file
// silent error handling - weak error
char *buf = static_cast<char*>(mmap(0, static_cast<size_t>(fs.st_size), PROT_READ, MAP_SHARED, fd, 0));
if (buf == MAP_FAILED)
{
cerr << "Could not memory map file \"" << filename << "\" (" << strerror(errno) << ")." << endl;
close(fd);
return -1;
}
char *buff_end = buf + fs.st_size;
char *begin = buf, *end = NULL;
// search for newline in the remainder in the file
while ((end = static_cast<char*>(memchr(begin,'\n',static_cast<size_t>(buff_end-begin)))) != NULL)
{
callback(begin,end);
if (end != buff_end)
begin = end+1;
else
break;
}
// enable if you are working with malformed text files, proper text file needs to end with a newline
#ifdef MALFORMED_TEXFILE
callback(begin,buff_end);
#endif
munmap(buf, static_cast<size_t>(fs.st_size));
// silent error handling - weak error
close(fd);
return 0;
}
/* Copyright (c) 2015 Simon Toth kontakt@simontoth.cz
* Lincensed under the MIT license: http://opensource.org/licenses/MIT
*/
#ifndef FASTLINEREADER_H
#define FASTLINEREADER_H
// STD C++
#include <iosfwd>
/** Quick line-by-line parser of text files for POSIX/Linux
*
* This function provides a fast line parser with a callback model.
*
* @param filename file to be parsed
* @param callback function that will be called for each line
* @returns 0 on success, -1 if file could not be opened
**/
int fastLineParser(const char * const filename, void (*callback)(const char * const, const char * const));
#endif // FASTLINEREADER_H
@westfly
Copy link

westfly commented Apr 17, 2019

callback may be replaced by std::function to archive better scalablity in c++11

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment