Created
July 3, 2022 16:36
-
-
Save apainintheneck/5f68bd8af9fd0a984205aa083e3b084a to your computer and use it in GitHub Desktop.
A simple CSV reader/writer library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <fstream> | |
#include <string> | |
#include <vector> | |
#include <cctype> | |
/* | |
csv_cpp is complaint with the RFC 4180 (https://datatracker.ietf.org/doc/html/rfc4180) | |
with a few exceptions. | |
1. It only uses the newline [\n] to separate lines instead of CRLF. | |
2. Custom separators are allowed as long as they aren't the | |
double quote ["] or newline [\n] character. The default separator | |
is the comma [,]. | |
Note: csv_cpp does NOT trim any leading or trailing whitespace characters | |
or check to make sure the CSV file is valid. | |
---API--- | |
Read: | |
---- | |
A CSV file can be read from an istream or a file and use custom separators. It will ignore blank | |
lines and fail silently if the custom separator is either a double quote ["] or a newline [\n]. | |
std::vector<std::vector<std::string>> read(std::istream& input, const char SEPARATOR = COMMA); | |
std::vector<std::vector<std::string>> read(const std::string& filepath, const char SEPARATOR = COMMA); | |
Write Default: | |
------------- | |
A CSV file is written from a vector<vector<string>> data structure using a custom separator | |
if provided and the return value indicates success. Data can be written to an ostream or a file. | |
bool write(const std::vector<std::vector<std::string>>& records, std::ostream& output, const char SEPARATOR = COMMA); | |
bool write(const std::vector<std::vector<std::string>>& records, const std::string& filepath, const char SEPARATOR = COMMA); | |
Write Custom: | |
------------ | |
A CSV file is written from a vector<Record> data structure where Record can be any custom type | |
and the return value indicates success. Data can be written to an ostream or a file. | |
Required Function: vector<string> record_to_fields(const Record& record); | |
This record_to_fields function is called internally to turn each Record into a series of strings | |
that can be stored in a CSV file. | |
template <typename Record> | |
bool write(const std::vector<Record>& records, std::ostream& output, const char SEPARATOR = COMMA); | |
template <typename Record> | |
bool write(const std::vector<Record>& records, const std::string& filepath, const char SEPARATOR = COMMA); | |
*/ | |
namespace csv_cpp { | |
#define DQUOTE '"' | |
#define COMMA ',' | |
#define NEWLINE '\n' | |
namespace detail { | |
/* | |
CSV Syntax | |
DQUOTE = ["] | |
SEPARATOR = [^"\n] | |
NEWLINE = [\n] | |
line = field (SEPARATOR field)* NEWLINE | |
field = DQUOTE CHAR* DQUOTE | |
| CHAR* | |
CHAR = DQUOTE | |
| [^DQUOTE] (?=SEPARATOR|NEWLINE) | |
*/ | |
std::string parse_field(std::istream& input, const char SEPARATOR) { | |
if(not input.good()) return ""; | |
std::string field_data; | |
if(input.peek() == DQUOTE) { | |
// Parse escaped field | |
input.ignore(); | |
bool is_quote = false; | |
while(input.good()) { | |
if(input.peek() == DQUOTE) { | |
// Only store the second of a pair of double quotes | |
if(is_quote) field_data.push_back(input.get()); | |
else input.ignore(); | |
is_quote = not is_quote; | |
} else if(input.peek() == SEPARATOR or input.peek() == NEWLINE) { | |
// Store only if not after one double quote | |
if(not is_quote) field_data.push_back(input.get()); | |
else break; | |
} else { | |
field_data.push_back(input.get()); | |
} | |
} | |
} else { | |
while(input.good()) { | |
if(input.peek() == SEPARATOR or input.peek() == NEWLINE) break; | |
field_data.push_back(input.get()); | |
} | |
} | |
return field_data; | |
} | |
std::vector<std::string> parse_line(std::istream& input, const char SEPARATOR) { | |
if(input.peek() == EOF or input.peek() == NEWLINE) { | |
input.ignore(); | |
return {}; | |
} | |
std::vector<std::string> line_data; | |
do { | |
line_data.push_back(parse_field(input, SEPARATOR)); | |
} while(input.good() and input.get() != NEWLINE); | |
return line_data; | |
} | |
bool needs_escaping(const std::string& field, const char SEPARATOR) { | |
for(const char ch : field) { | |
if(ch == DQUOTE or ch == NEWLINE or ch == SEPARATOR) return true; | |
} | |
return false; | |
} | |
bool valid_separator(const char ch) { | |
return ch != DQUOTE && ch != NEWLINE; | |
} | |
} // namespace detail | |
// | |
// Read | |
// | |
std::vector<std::vector<std::string>> read(std::istream& input, const char SEPARATOR = COMMA) { | |
if(not detail::valid_separator(SEPARATOR)) return {}; | |
std::vector<std::vector<std::string>> csv_data; | |
std::vector<std::string> buffer; | |
while(input.good()) { | |
buffer = detail::parse_line(input, SEPARATOR); | |
if(buffer.empty()) continue; // Ignore blank lines | |
csv_data.push_back(std::move(buffer)); | |
} | |
return csv_data; | |
} | |
std::vector<std::vector<std::string>> read(const std::string& filepath, const char SEPARATOR = COMMA) { | |
std::ifstream infile(filepath); | |
if(not infile.is_open()) return {}; | |
else return read(infile, SEPARATOR); | |
} | |
// | |
// Write Default | |
// | |
bool write(const std::vector<std::vector<std::string>>& records, std::ostream& output, const char SEPARATOR = COMMA) { | |
if(not output.good() or not detail::valid_separator(SEPARATOR)) return false; | |
for(const auto& record : records) { | |
for(int i = 0; i < record.size(); ++i) { | |
if(detail::needs_escaping(record[i], SEPARATOR)) { | |
// Escape field | |
output << DQUOTE; | |
for(char ch : record[i]) { | |
// Convert one double quote into a pair of double quotes | |
if(ch == DQUOTE) output << DQUOTE << DQUOTE; | |
else output << ch; | |
} | |
output << DQUOTE; | |
} else { | |
output << record[i]; | |
} | |
// Write field separator | |
output << (i + 1 == record.size() ? NEWLINE : SEPARATOR); | |
} | |
} | |
return output.good(); | |
} | |
bool write(const std::vector<std::vector<std::string>>& records, const std::string& filepath, const char SEPARATOR = COMMA) { | |
std::ofstream outfile(filepath); | |
return outfile.is_open() && write(records, outfile, SEPARATOR); | |
} | |
// | |
// Write Custom | |
// | |
template <typename Record> | |
bool write(const std::vector<Record>& records, std::ostream& output, const char SEPARATOR = COMMA) { | |
if(not output.good() or not detail::valid_separator(SEPARATOR)) return false; | |
for(const auto& record : records) { | |
std::vector<std::string> fields = record_to_fields(record); | |
for(int i = 0; i < fields.size(); ++i) { | |
if(detail::needs_escaping(fields[i], SEPARATOR)) { | |
// Escape field | |
output << DQUOTE; | |
for(char ch : fields[i]) { | |
// Convert one double quote into a pair of double quotes | |
if(ch == DQUOTE) output << DQUOTE << DQUOTE; | |
else output << ch; | |
} | |
output << DQUOTE; | |
} else { | |
output << fields[i]; | |
} | |
// Write field separator | |
output << (i + 1 == fields.size() ? NEWLINE : SEPARATOR); | |
} | |
} | |
return output.good(); | |
} | |
template <typename Record> | |
bool write(const std::vector<Record>& records, const std::string& filepath, const char SEPARATOR = COMMA) { | |
std::ofstream outfile(filepath); | |
return outfile.is_open() && write(records, outfile, SEPARATOR); | |
} | |
#undef DQUOTE | |
#undef COMMA | |
#undef NEWLINE | |
} // namespace csv_cpp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment