Skip to content

Instantly share code, notes, and snippets.

@integeruser
Last active July 6, 2019 16:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save integeruser/a7a4b8dfceef1ad9ded0c9cf7c88c1d9 to your computer and use it in GitHub Desktop.
Save integeruser/a7a4b8dfceef1ad9ded0c9cf7c88c1d9 to your computer and use it in GitHub Desktop.
Benchmarking different ways of reading lines from a file
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
/* $(CXX) -std=c++11 -march=native -O3 -o readlines readlines.cpp */
#define BUF_SIZE 1024*16
using hrc = std::chrono::high_resolution_clock;
void test00(const char* filename)
{
size_t num_lines = 0;
char buf[BUF_SIZE];
FILE* fp = fopen(filename, "r");
hrc::time_point start_time = hrc::now();
while (fgets(buf, BUF_SIZE, fp)) {
++num_lines;
}
hrc::time_point end_time = hrc::now();
fclose(fp);
std::chrono::duration<float> duration = end_time-start_time;
std::cout << __func__ << "(fgets): " << num_lines << " lines / " <<
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl;
}
void test01(const char* filename)
{
size_t num_lines = 0;
std::ifstream file(filename);
std::string line;
hrc::time_point start_time = hrc::now();
while (std::getline(file, line)) {
++num_lines;
}
hrc::time_point end_time = hrc::now();
file.close();
std::chrono::duration<float> duration = end_time-start_time;
std::cout << __func__ << "(getline): " << num_lines << " lines / " <<
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl;
}
void test02(const char* filename)
{
size_t num_lines = 0;
char buf[BUF_SIZE];
std::ifstream file(filename);
hrc::time_point start_time = hrc::now();
while (file) {
file.read(buf, BUF_SIZE);
const ssize_t bytes_read = file.gcount();
char* line = buf;
char* newline;
while ((newline = (char*) memchr(line, '\n', (buf+bytes_read)-line))) {
line = newline+1;
++num_lines;
}
const ptrdiff_t bytes_used = line-buf;
file.seekg(-(bytes_read-bytes_used), std::ios_base::cur);
}
hrc::time_point end_time = hrc::now();
file.close();
std::chrono::duration<float> duration = end_time-start_time;
std::cout << __func__ << "(ifstream): " << num_lines << " lines / " <<
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl;
}
void test03(const char* filename)
{
size_t num_lines = 0;
char buf[BUF_SIZE];
int fd = open(filename, O_RDONLY);
ssize_t bytes_read;
hrc::time_point start_time = hrc::now();
while ((bytes_read = read(fd, buf, BUF_SIZE))) {
char* line = buf;
char* newline;
while ((newline = (char*) memchr(line, '\n', (buf+bytes_read)-line))) {
line = newline+1;
++num_lines;
}
const ptrdiff_t bytes_used = line-buf;
lseek(fd, -(bytes_read-bytes_used), SEEK_CUR);
}
hrc::time_point end_time = hrc::now();
close(fd);
std::chrono::duration<float> duration = end_time-start_time;
std::cout << __func__ << "(read+lseek): " << num_lines << " lines / " <<
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl;
}
void test04(const char* filename)
{
size_t num_lines = 0;
char buf[BUF_SIZE];
int fd = open(filename, O_RDONLY);
ssize_t bytes_read;
off_t bytes_prev_read = 0;
hrc::time_point start_time = hrc::now();
while ((bytes_read = read(fd, buf+bytes_prev_read, BUF_SIZE-bytes_prev_read))) {
bytes_read += bytes_prev_read;
char* line = buf;
char* newline;
while ((newline = (char*) memchr(line, '\n', (buf+bytes_read)-line))) {
line = newline+1;
++num_lines;
}
const ptrdiff_t bytes_used = line-buf;
bytes_prev_read = bytes_read-bytes_used;
memcpy(buf, line, bytes_prev_read);
}
hrc::time_point end_time = hrc::now();
close(fd);
std::chrono::duration<float> duration = end_time-start_time;
std::cout << __func__ << "(read+memcpy): " << num_lines << " lines / " <<
std::fixed << std::setprecision(3) << duration.count() << " s" << std::endl;
}
int main(int argc, char const *argv[])
{
if (argc != 2) {
std::cerr << "Usage: fast_readline filename" << std::endl;
return EXIT_FAILURE;
}
const char* filename = argv[1];
test00(filename);
test01(filename);
test02(filename);
test03(filename);
test04(filename);
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment