Skip to content

Instantly share code, notes, and snippets.

@apainintheneck
Created July 3, 2022 16:36
Show Gist options
  • Save apainintheneck/5f68bd8af9fd0a984205aa083e3b084a to your computer and use it in GitHub Desktop.
Save apainintheneck/5f68bd8af9fd0a984205aa083e3b084a to your computer and use it in GitHub Desktop.
A simple CSV reader/writer library
#pragma once
#include <fstream>
#include <string>
#include <vector>
#include <cctype>
/*
csv_cpp is complaint with the RFC 4180 (https://datatracker.ietf.org/doc/html/rfc4180)
with a few exceptions.
1. It only uses the newline [\n] to separate lines instead of CRLF.
2. Custom separators are allowed as long as they aren't the
double quote ["] or newline [\n] character. The default separator
is the comma [,].
Note: csv_cpp does NOT trim any leading or trailing whitespace characters
or check to make sure the CSV file is valid.
---API---
Read:
----
A CSV file can be read from an istream or a file and use custom separators. It will ignore blank
lines and fail silently if the custom separator is either a double quote ["] or a newline [\n].
std::vector<std::vector<std::string>> read(std::istream& input, const char SEPARATOR = COMMA);
std::vector<std::vector<std::string>> read(const std::string& filepath, const char SEPARATOR = COMMA);
Write Default:
-------------
A CSV file is written from a vector<vector<string>> data structure using a custom separator
if provided and the return value indicates success. Data can be written to an ostream or a file.
bool write(const std::vector<std::vector<std::string>>& records, std::ostream& output, const char SEPARATOR = COMMA);
bool write(const std::vector<std::vector<std::string>>& records, const std::string& filepath, const char SEPARATOR = COMMA);
Write Custom:
------------
A CSV file is written from a vector<Record> data structure where Record can be any custom type
and the return value indicates success. Data can be written to an ostream or a file.
Required Function: vector<string> record_to_fields(const Record& record);
This record_to_fields function is called internally to turn each Record into a series of strings
that can be stored in a CSV file.
template <typename Record>
bool write(const std::vector<Record>& records, std::ostream& output, const char SEPARATOR = COMMA);
template <typename Record>
bool write(const std::vector<Record>& records, const std::string& filepath, const char SEPARATOR = COMMA);
*/
namespace csv_cpp {
#define DQUOTE '"'
#define COMMA ','
#define NEWLINE '\n'
namespace detail {
/*
CSV Syntax
DQUOTE = ["]
SEPARATOR = [^"\n]
NEWLINE = [\n]
line = field (SEPARATOR field)* NEWLINE
field = DQUOTE CHAR* DQUOTE
| CHAR*
CHAR = DQUOTE
| [^DQUOTE] (?=SEPARATOR|NEWLINE)
*/
std::string parse_field(std::istream& input, const char SEPARATOR) {
if(not input.good()) return "";
std::string field_data;
if(input.peek() == DQUOTE) {
// Parse escaped field
input.ignore();
bool is_quote = false;
while(input.good()) {
if(input.peek() == DQUOTE) {
// Only store the second of a pair of double quotes
if(is_quote) field_data.push_back(input.get());
else input.ignore();
is_quote = not is_quote;
} else if(input.peek() == SEPARATOR or input.peek() == NEWLINE) {
// Store only if not after one double quote
if(not is_quote) field_data.push_back(input.get());
else break;
} else {
field_data.push_back(input.get());
}
}
} else {
while(input.good()) {
if(input.peek() == SEPARATOR or input.peek() == NEWLINE) break;
field_data.push_back(input.get());
}
}
return field_data;
}
std::vector<std::string> parse_line(std::istream& input, const char SEPARATOR) {
if(input.peek() == EOF or input.peek() == NEWLINE) {
input.ignore();
return {};
}
std::vector<std::string> line_data;
do {
line_data.push_back(parse_field(input, SEPARATOR));
} while(input.good() and input.get() != NEWLINE);
return line_data;
}
bool needs_escaping(const std::string& field, const char SEPARATOR) {
for(const char ch : field) {
if(ch == DQUOTE or ch == NEWLINE or ch == SEPARATOR) return true;
}
return false;
}
bool valid_separator(const char ch) {
return ch != DQUOTE && ch != NEWLINE;
}
} // namespace detail
//
// Read
//
std::vector<std::vector<std::string>> read(std::istream& input, const char SEPARATOR = COMMA) {
if(not detail::valid_separator(SEPARATOR)) return {};
std::vector<std::vector<std::string>> csv_data;
std::vector<std::string> buffer;
while(input.good()) {
buffer = detail::parse_line(input, SEPARATOR);
if(buffer.empty()) continue; // Ignore blank lines
csv_data.push_back(std::move(buffer));
}
return csv_data;
}
std::vector<std::vector<std::string>> read(const std::string& filepath, const char SEPARATOR = COMMA) {
std::ifstream infile(filepath);
if(not infile.is_open()) return {};
else return read(infile, SEPARATOR);
}
//
// Write Default
//
bool write(const std::vector<std::vector<std::string>>& records, std::ostream& output, const char SEPARATOR = COMMA) {
if(not output.good() or not detail::valid_separator(SEPARATOR)) return false;
for(const auto& record : records) {
for(int i = 0; i < record.size(); ++i) {
if(detail::needs_escaping(record[i], SEPARATOR)) {
// Escape field
output << DQUOTE;
for(char ch : record[i]) {
// Convert one double quote into a pair of double quotes
if(ch == DQUOTE) output << DQUOTE << DQUOTE;
else output << ch;
}
output << DQUOTE;
} else {
output << record[i];
}
// Write field separator
output << (i + 1 == record.size() ? NEWLINE : SEPARATOR);
}
}
return output.good();
}
bool write(const std::vector<std::vector<std::string>>& records, const std::string& filepath, const char SEPARATOR = COMMA) {
std::ofstream outfile(filepath);
return outfile.is_open() && write(records, outfile, SEPARATOR);
}
//
// Write Custom
//
template <typename Record>
bool write(const std::vector<Record>& records, std::ostream& output, const char SEPARATOR = COMMA) {
if(not output.good() or not detail::valid_separator(SEPARATOR)) return false;
for(const auto& record : records) {
std::vector<std::string> fields = record_to_fields(record);
for(int i = 0; i < fields.size(); ++i) {
if(detail::needs_escaping(fields[i], SEPARATOR)) {
// Escape field
output << DQUOTE;
for(char ch : fields[i]) {
// Convert one double quote into a pair of double quotes
if(ch == DQUOTE) output << DQUOTE << DQUOTE;
else output << ch;
}
output << DQUOTE;
} else {
output << fields[i];
}
// Write field separator
output << (i + 1 == fields.size() ? NEWLINE : SEPARATOR);
}
}
return output.good();
}
template <typename Record>
bool write(const std::vector<Record>& records, const std::string& filepath, const char SEPARATOR = COMMA) {
std::ofstream outfile(filepath);
return outfile.is_open() && write(records, outfile, SEPARATOR);
}
#undef DQUOTE
#undef COMMA
#undef NEWLINE
} // namespace csv_cpp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment