Skip to content

Instantly share code, notes, and snippets.

@josephlord
Forked from mukeshthawani/CSVParser.swift
Created October 1, 2021 13:12
Show Gist options
  • Save josephlord/15536e94fb04079e2c7df0b8f60bb323 to your computer and use it in GitHub Desktop.
Save josephlord/15536e94fb04079e2c7df0b8f60bb323 to your computer and use it in GitHub Desktop.
A simple CSV parser in Swift

A simple CSV parser in Swift

Description

A CSV parser written in Swift. Takes a String which contains CSV values and converts it into a 2D array of values. Algorithm's complexity: O(2n), where n is the number of characters in the given string.

Usage

let sampleText = """
                    john,UK,2000
                    jack,UK,2005
                    """
let values = parse(string: sampleText)
print(values)
// prints: [["john", "UK", "2000"], ["jack", "UK", "2005"]]

Reading from a file

It accepts a String so if you have a CSV file in the resources, then you can get the text like this:

let file = "grades"
guard let url = Bundle.main.url(forResource: file, withExtension: "csv"),
    let text = try? String(contentsOf: url, encoding: .utf8) else {
        return
}

// Pass the text
let csvValues = parse(string: text)
print("CSV values: \(csvValues)")

Todo

  • Handle the field(coloumn) names
/// Parses a csv string and returns a 2d array.
///
/// Size of the array will be equal to the number of rows.
/// And Size of the subarray will be equal to the
/// number of fields.
///
/// Note: Delimiter can be changed to a different character
/// like semicolon.
func parse(string: String, delimiter: Character = ",") -> [[String]]{
let rows = string.split(separator: "\n")
return rows.map { $0.split(separator: String(delimiter)) }
}
extension String {
/// Splits a string into an array of subsequences
/// using a separator.
///
/// Note: Separator is ignored inside enclosure characters.
func split(separator: String, enclosure: Character = "\"") -> [String] {
var values: [String] = []
// Index of the last processed separator
var lastSeparatorIndex = startIndex
var isInsideDoubleQuotes = false
for index in 0..<count {
let substringStartIndex = self.index(startIndex, offsetBy: index)
let substringEndIndex = self.index(substringStartIndex, offsetBy: separator.count)
guard index < count - separator.count else {
// No more separators
// Add remaining characters
values.append(String(self[lastSeparatorIndex..<endIndex]))
break
}
let substring = self[substringStartIndex..<substringEndIndex]
if substring == separator && !isInsideDoubleQuotes {
let newstr = String(self[lastSeparatorIndex..<substringStartIndex])
values.append(newstr)
lastSeparatorIndex = substringEndIndex
} else if self[substringStartIndex] == enclosure {
isInsideDoubleQuotes = !isInsideDoubleQuotes
}
}
return values
}
}
class CSVParserTests: XCTestCase {
func testSingleLineString() {
let sampleText = "john,UK,2000"
XCTAssertEqual(
parse(string: sampleText),
[["john", "UK", "2000"]])
}
func testMultiLineString() {
let sampleText = """
john,UK,2000
jack,UK,2005
"""
XCTAssertEqual(
parse(string: sampleText),
[["john", "UK", "2000"],["jack", "UK", "2005"]])
}
func testSingleLineStringWithDoubleQuotes() {
let sampleText = "\"john, last\",UK,2000"
XCTAssertEqual(
parse(string: sampleText),
[["\"john, last\"", "UK", "2000"]])
}
func testMultiLineStringWithDoubleQuotes() {
let sampleText = """
john,UK,2000
jack,UK,2005
"natasha
obama",UK,2009
"""
XCTAssertEqual(parse(string: sampleText), [
["john", "UK", "2000"],
["jack", "UK", "2005"],
["\"natasha\n obama\"","UK","2009"]])
}
func testStringWithDoubleQuoteInsideDoubleQuotes() {
let sampleText = "\"aaa\",\"b\"\"bb\",\"ccc\""
XCTAssertEqual(
parse(string: sampleText),
[["\"aaa\"", "\"b\"\"bb\"", "\"ccc\""]])
}
func testStringWithWhiteSpace() {
let sampleText = " aaa, bbb, ccc"
XCTAssertEqual(
parse(string: sampleText),
[[" aaa", " bbb", " ccc"]])
}
func testStringWithSingleValue() {
XCTAssertEqual(parse(string: "UK"), [["UK"]])
}
func testStringWithEmptyValue() {
XCTAssertEqual(parse(string: ""), [])
}
}
@josephlord
Copy link
Author

Thanks for making this public. I found the CSV splitting function was really useful. I think I've managed to make it faster by making it return subsequences rather than strings and by doing less counts. Just wanted to share in case you found it useful.

extension String {

    /// Splits a string into an array of subsequences
    /// using a separator.
    ///
    /// Note: Separator is ignored inside enclosure characters.
    func csvSplit(separator: String, enclosure: Character = "\"") -> [String.SubSequence] {
        var values: [String.SubSequence] = []
        // Index of the last processed separator
        var lastSeparatorIndex = startIndex
        var isInsideDoubleQuotes = false
        let separatorCount = separator.count

        for loopIndex in indices {
            let substringStartIndex = loopIndex
            let substringEndIndex = self.index(substringStartIndex, offsetBy: separatorCount)

            guard substringEndIndex < endIndex else {
                // No more separators
                // Add remaining characters
                values.append(self[lastSeparatorIndex..<endIndex])
                break
            }
            let substring = self[substringStartIndex..<substringEndIndex]

            if !isInsideDoubleQuotes && substring == separator  {
                let newstr = self[lastSeparatorIndex..<substringStartIndex]
                values.append(newstr)
                lastSeparatorIndex = substringEndIndex
            } else if self[substringStartIndex] == enclosure {
                isInsideDoubleQuotes.toggle()
            }
        }
        return values
    }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment