Skip to content

Instantly share code, notes, and snippets.

@mukeshthawani
Last active October 1, 2021 13:12
Show Gist options
  • Save mukeshthawani/7dd1d7d66ae7bd451dc34d02a7c0087b to your computer and use it in GitHub Desktop.
Save mukeshthawani/7dd1d7d66ae7bd451dc34d02a7c0087b to your computer and use it in GitHub Desktop.
A simple CSV parser in Swift

A simple CSV parser in Swift

Description

A CSV parser written in Swift. Takes a String which contains CSV values and converts it into a 2D array of values. Algorithm's complexity: O(2n), where n is the number of characters in the given string.

Usage

let sampleText = """
                    john,UK,2000
                    jack,UK,2005
                    """
let values = parse(string: sampleText)
print(values)
// prints: [["john", "UK", "2000"], ["jack", "UK", "2005"]]

Reading from a file

It accepts a String so if you have a CSV file in the resources, then you can get the text like this:

let file = "grades"
guard let url = Bundle.main.url(forResource: file, withExtension: "csv"),
    let text = try? String(contentsOf: url, encoding: .utf8) else {
        return
}

// Pass the text
let csvValues = parse(string: text)
print("CSV values: \(csvValues)")

Todo

  • Handle the field(coloumn) names
/// Parses a csv string and returns a 2d array.
///
/// Size of the array will be equal to the number of rows.
/// And Size of the subarray will be equal to the
/// number of fields.
///
/// Note: Delimiter can be changed to a different character
/// like semicolon.
func parse(string: String, delimiter: Character = ",") -> [[String]]{
let rows = string.split(separator: "\n")
return rows.map { $0.split(separator: String(delimiter)) }
}
extension String {
/// Splits a string into an array of subsequences
/// using a separator.
///
/// Note: Separator is ignored inside enclosure characters.
func split(separator: String, enclosure: Character = "\"") -> [String] {
var values: [String] = []
// Index of the last processed separator
var lastSeparatorIndex = startIndex
var isInsideDoubleQuotes = false
for index in 0..<count {
let substringStartIndex = self.index(startIndex, offsetBy: index)
let substringEndIndex = self.index(substringStartIndex, offsetBy: separator.count)
guard index < count - separator.count else {
// No more separators
// Add remaining characters
values.append(String(self[lastSeparatorIndex..<endIndex]))
break
}
let substring = self[substringStartIndex..<substringEndIndex]
if substring == separator && !isInsideDoubleQuotes {
let newstr = String(self[lastSeparatorIndex..<substringStartIndex])
values.append(newstr)
lastSeparatorIndex = substringEndIndex
} else if self[substringStartIndex] == enclosure {
isInsideDoubleQuotes = !isInsideDoubleQuotes
}
}
return values
}
}
class CSVParserTests: XCTestCase {
func testSingleLineString() {
let sampleText = "john,UK,2000"
XCTAssertEqual(
parse(string: sampleText),
[["john", "UK", "2000"]])
}
func testMultiLineString() {
let sampleText = """
john,UK,2000
jack,UK,2005
"""
XCTAssertEqual(
parse(string: sampleText),
[["john", "UK", "2000"],["jack", "UK", "2005"]])
}
func testSingleLineStringWithDoubleQuotes() {
let sampleText = "\"john, last\",UK,2000"
XCTAssertEqual(
parse(string: sampleText),
[["\"john, last\"", "UK", "2000"]])
}
func testMultiLineStringWithDoubleQuotes() {
let sampleText = """
john,UK,2000
jack,UK,2005
"natasha
obama",UK,2009
"""
XCTAssertEqual(parse(string: sampleText), [
["john", "UK", "2000"],
["jack", "UK", "2005"],
["\"natasha\n obama\"","UK","2009"]])
}
func testStringWithDoubleQuoteInsideDoubleQuotes() {
let sampleText = "\"aaa\",\"b\"\"bb\",\"ccc\""
XCTAssertEqual(
parse(string: sampleText),
[["\"aaa\"", "\"b\"\"bb\"", "\"ccc\""]])
}
func testStringWithWhiteSpace() {
let sampleText = " aaa, bbb, ccc"
XCTAssertEqual(
parse(string: sampleText),
[[" aaa", " bbb", " ccc"]])
}
func testStringWithSingleValue() {
XCTAssertEqual(parse(string: "UK"), [["UK"]])
}
func testStringWithEmptyValue() {
XCTAssertEqual(parse(string: ""), [])
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment