Skip to content

Instantly share code, notes, and snippets.

@DanielHeath
Created September 4, 2018 23:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DanielHeath/bbff29b3f017f7ae0ea4461ef71021e0 to your computer and use it in GitHub Desktop.
Save DanielHeath/bbff29b3f017f7ae0ea4461ef71021e0 to your computer and use it in GitHub Desktop.
How I do CSV imports with postgres & rails
class CsvImport
include ActiveRecord::Sanitization::ClassMethods
attr_accessor :connection, :header
def self.read_csv(file)
file.is_a?(Array) ? file : file.read.encode(newline: :universal).lines
end
def initialize(connection, csv_lines, table_name)
@connection, @table_name = connection, table_name
@header, *@file = csv_lines
letters = (97..122).map(&:chr).shuffle
# Unique suffix
@table_name += "__" + letters[1..10].join
end
def check_header_row(expected)
req_header_row = @header.strip.split(",")
raise "Expected header to be #{expected} but it was #{req_header_row}" if req_header_row != expected
end
def schema(options={})
@connection.create_table(
@table_name,
{temporary: true}.merge(options)
) do |t|
yield(t)
end
end
def prepare_data
import!
yield klass
end
def validate_data
import!
yield klass
end
def after
import!
yield klass
end
def klass
return @klass if @klass
@klass = make_klass
end
def make_klass
tn = @table_name
Class.new(ApplicationRecord) do
self.table_name = tn
end
end
def import!
return if @imported
@imported = true
i = 2 # row number
@connection.raw_connection.copy_data <<-SQL do
copy #{@table_name} from stdin with csv delimiter ',' quote '"'
SQL
@file.each do |line|
@connection.raw_connection.put_copy_data i.to_s + "," + line
i += 1
end
end
end
def teardown!
@connection.drop_table(@table_name)
end
def self.import(connection, file, table_name = csv_imports)
instance = new(connection, file, table_name)
connection.transaction do
yield(instance)
instance.teardown!
end
end
end
class FooBar < ApplicationRecord
def self.import_from_csv(csv_lines)
CsvImport.import(ApplicationRecord.connection, csv_lines, 'tmp_upload_thingy') do |import|
req_header_row = import.header.strip.split(",")
raise "Expected header to start with #{csv_headers} but it was #{req_header_row}" if req_header_row[0..(csv_headers.length-1)] != csv_headers
import.schema do |t|
t.string :name
t.string :kind
(req_header_row.length - csv_headers.length).times do |i|
t.string "col_#{i}"
end
end
import.import!
import.klass.connection.execute <<-SQL
insert into foo_bars (
name,
type,
) select
name,
kind,
from #{import.klass.table_name}
SQL
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment