Last active
February 15, 2021 22:49
-
-
Save RoyBellingan/1f30f2cab37223f140c7d073104f52a5 to your computer and use it in GitHub Desktop.
readCSVRow.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
std::vector<QStringRef> readCSVRow(const QString& line, const QStringList& separator, const QStringList& escape) { | |
std::vector<QStringRef> part; | |
// newState = delta[currentState][event] | |
// rows are states | |
// columns are events: | |
// 0 = read separator | |
// 1 = read escape | |
// 2 = read new line | |
// 3 = read "normal" character | |
// 4 = reached the end of line (eof if line is a file content) | |
static const int delta[][5] = { | |
// , " \n ? eof | |
{1, 2, -1, 0, -1}, // 0: parsing (store char) | |
{1, 2, -1, 0, -1}, // 1: parsing (store column) | |
{3, 4, 3, 3, -2}, // 2: quote entered (no-op) | |
{3, 4, 3, 3, -2}, // 3: parsing inside quotes (store char) | |
{1, 3, -1, 0, -1}, // 4: quote exited (no-op) | |
// -1: end of row, store column, success | |
// -2: eof inside quotes | |
}; | |
part.clear(); //clear del contenitore dei pezzi in cui scomporre la riga | |
if (line.isEmpty()) { | |
return part; | |
} | |
int actualState = 0; | |
int pos = 0; | |
// initial invalid value | |
int event = -1; | |
int currentBlockStart = -1; | |
int blockEnd = 0; | |
static const QString newline = "\n"; | |
static const QString empty; | |
while (actualState >= 0) { | |
if (pos >= line.length()) | |
event = 4; | |
else { | |
QStringRef ch = line.midRef(pos, 1); | |
pos++; | |
if (separator.contains(ch, Qt::CaseInsensitive)){ | |
event = 0; | |
} else if (escape.contains(ch, Qt::CaseInsensitive)) { | |
event = 1; | |
} else if (ch == newline){ | |
event = 2; | |
} else{ | |
event = 3; | |
} | |
} | |
actualState = delta[actualState][event]; | |
switch (actualState) { | |
case 4: | |
blockEnd = pos - 1; | |
break; | |
case 0: | |
case 3: | |
if (currentBlockStart == -1) { | |
currentBlockStart = pos - 1; | |
} | |
break; | |
case -1: | |
case 1: | |
if (!blockEnd) { | |
blockEnd = pos - 1; | |
} | |
if (currentBlockStart == -1) { //a new block has never started, we have two separator in a row | |
part.push_back(empty.midRef(0, 0)); | |
blockEnd = 0; | |
} else { | |
QStringRef v = line.midRef(currentBlockStart, (blockEnd - currentBlockStart)); | |
blockEnd = 0; | |
part.push_back(v); | |
currentBlockStart = -1; | |
//curentBlock.clear(); | |
} | |
break; | |
} | |
} | |
if (actualState == -2){ | |
throw std::runtime_error("End-of-file found while inside quotes."); | |
} | |
return part; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment