Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Simple CSV Parser vs. CSV Gem Benchmark
require 'open-uri'
require 'nokogiri'
require 'parslet'
require 'csv'
# --------------------------------------------------
# Auxiliary code
class String
def unquote
self.gsub(/(^"|"$)/,"").gsub(/""/,'"')
end
end
def time &block
t1 = Time.now
block.call if block_given?
t2 = Time.now
puts t2-t1
end
# --------------------------------------------------
# Fetching test data
doc = Nokogiri::HTML(open('https://github.com/circle/fastercsv/blob/master/test/test_data.csv'))
data = doc.css('pre div.line').inject("") do |result, link|
result << link.content << "\n"
result
end
# --------------------------------------------------
# CSV parser
class Parser < Parslet::Parser
rule(:file) {(record.as(:row) >> newline).repeat(1)}
rule(:record) {field.as(:column) >> (comma >> field.as(:column)).repeat}
rule(:field) {escaped | non_escaped}
rule(:escaped) {d_quote >> (textdata | comma | cr | lf | d_quote >> d_quote).repeat >> d_quote}
rule(:non_escaped) {textdata.repeat}
rule(:textdata) {((comma | d_quote | cr | lf).absent? >> any).repeat(1)}
rule(:newline) {lf >> cr.maybe}
rule(:lf) {str("\n")}
rule(:cr) {str("\r")}
rule(:d_quote) {str('"')}
rule(:comma) {str(',')}
root(:file)
end
class Transformer < Parslet::Transform
rule(:column => subtree(:field)) do
if field.is_a?(Array) # = empty array []
nil
else
field.to_s.unquote
end
end
rule(:row => subtree(:array)) {array}
end
# --------------------------------------------------
# Benchmark
# CSV parser
time do
tree = Parser.new.parse(data)
Transformer.new.apply(tree)
end
# => 33.771940832
# CSV gem
time do
CSV.parse(data)
end
# => 0.040131919
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment