Skip to content

Instantly share code, notes, and snippets.

@semarco
Created September 21, 2015 15:35
Show Gist options
  • Save semarco/05de1c349f09cf09d952 to your computer and use it in GitHub Desktop.
Save semarco/05de1c349f09cf09d952 to your computer and use it in GitHub Desktop.
# The Importer pulls Excel based customer data into the local database.
class ProductImporter
# Exception we raise when there is a mismatch between the excel columns
# and our database defined ones.
class ColumnMismatch < StandardError
def initialize(mismatched_columns)
@mismatched_columns = mismatched_columns
super(message)
end
def message
<<-MSG.squish
The Excel columns do not match the ones defined in the database.
The mismatching columns are: #{@mismatched_columns.inspect}
MSG
end
end
def self.running?
# Need to prefix like check because 'Importer' is too generic
# Product Importer will always indicate a running or queued import
Delayed::Job.where("handler like '%#{name}%'").where('locked_at is not NULL').any?
end
def self.queued?
Delayed::Job.where("handler like '%#{name}%'").where('locked_at is NULL').any?
end
# Import excel file at +file_path+ to our +Product+ table.
#
# In case there is a mismatch between the excel columns and those from
# the +Product+ table, a +ColumnMistmatch+ exception will be raised indicating
# the intruding columns.
def self.import(file_path)
unless ProjectType.any? && ProjectTypeCategory.any?
abort 'Products require project type data. Please import that first `rake psp:project_types:import`'
end
import_product_types(file_path)
book = Creek::Book.new(file_path)
sheet = book.sheets[0]
all_columns = sheet.rows.first.values
# The ones we care about.
product_specific_excel_columns = all_columns[0...42].push(all_columns[167]).push(all_columns[170])
use_case_specific_columns = all_columns[43...139]
product_type_specific_columns = all_columns[140...166]
boolean_column_names = Product.columns.select{|c| c.sql_type == "boolean" }.map(&:name)
unless (a = product_specific_excel_columns.sort) == (b = ProductColumnMapper::COLUMN_MAPPINGS.keys.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
unless (a = use_case_specific_columns.sort) == (b = ProductColumnMapper::USE_CASE_COLUMNS.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
unless (a = product_type_specific_columns.sort) == (b = ProductColumnMapper::PRODUCT_TYPE_COLUMNS.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
index = 0
sheet.rows.each do |row|
# Skip the next rows (column titles and so on..).
index = index + 1
next if index < 2
break unless row.map{|c| c[1]}.any?
# Build product data hash. Note that we rely on the column mappings
# to have the same ordering as the excel file row titles.
product_specific_row = row.values[0...42].push(row.values[167]).push(row.values[170])
use_case_specific_row = row.values[43...139]
product_type_specific_row = row.values[140...166]
product_data = Hash[ProductColumnMapper::COLUMN_MAPPINGS.values.zip(product_specific_row)]
use_case_data = Hash[ProductColumnMapper::USE_CASE_COLUMNS.zip(use_case_specific_row)]
product_type_data = Hash[ProductColumnMapper::PRODUCT_TYPE_COLUMNS.zip(product_type_specific_row)]
# Process this row according to our business rules.
product_data.each do |k, v|
# FIXES - DEPRECATION WARNING: You attempted to assign a value which is not explicitly `true` or `false` to a boolean column. Currently this
# value casts to `false`. This will change to match Ruby's semantics, and will cast to `true` in Rails 5. If you would like to maintain
# the current behavior, you should explicitly handle the values you would like cast to `false`.
# REASON: 'FSC environmental' columns contains 'X' for true, and '0' for false, and this '0' is read as 0.0 float, and doesn't get
# translated properly to `false` value - ConnectionAdapters::Column::FALSE_VALUES includes 0 but not 0.0
if boolean_column_names.include?(k)
if v.try(:downcase) == 'x' || v.try(:downcase) == 'y' # 'X' or 'Y' generally mean true.
product_data[k] = true
elsif v.try(:zero?)
product_data[k] = false
elsif v.nil?
product_data.delete(k) # instead of assigning nil we use column default values
end
next
end
# This is because Creek parses the brand id to integer
# and does away with the leading zero, which we do not want
# to do, since other files store the same ID as text (and hence
# creek will not parse it there. :/)
if k == 'brand_id'
product_data[k] = v.rjust(4, '0')
next
end
if k == 'packaging_type'
case v
when 'F'
product_data[k] = 'sheets'
next
when 'B'
product_data[k] = 'envelopes'
next
else
product_data[k] = nil
next
end
end
end
# Using active record, but should this prove too slow, we might
# want to do the following:
# 1. Execute an SQL insert.
# 2. If still slow, wrap this whole sheet iteration in a transaction.
product = Product.where(article_id: product_data['article_id']).first_or_create! do |product|
product.attributes = product_data
end
# +bundled_data+ is project_type_category + project_type + elements. See Excel file.
use_case_data.each do |bundled_data, value|
next unless value.try(:downcase) == 'x'
project_type_name = bundled_data.split("\;")[1]
element = bundled_data.split("\;")[2]
# I am relaying on the assumption that it's in our database, which it should.
prj_type = ProjectType.find_by(name: project_type_name)
UseCase.create(product_id: product.id, project_type_id: prj_type.id, element: element)
end
product_type_data.each do |product_type_name, value|
next unless value.try(:downcase) == 'x'
translation_key = product_type_name.downcase.gsub(/\s/, '_')
# I am relaying on the assumption that it's in our database, which it should.
product_type = ProductType.find_by(translation_key: translation_key)
product.update_attribute(:product_type_id, product_type.id)
end
end
import_danish_product_descriptions(file_path)
import_swedish_product_descriptions(file_path)
import_norwegian_product_descriptions(file_path)
end
def self.import_product_types(file_path)
book = Creek::Book.new(file_path)
sheet = book.sheets[1]
excel_column_names = sheet.rows.first.values[0...5].uniq
unless (a = excel_column_names.sort) == (b = ProductTypeColumnMapper::COLUMN_MAPPINGS.keys.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
index = 0
sheet.rows.each do |row|
# Skip the next rows (column titles and so on..).
index = index + 1
next if index < 2
# This is the last row.
break unless row.map{|c| c[1]}.any?
product_type_data = Hash[ProductTypeColumnMapper::COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]
translation_key = product_type_data['en_gb'].downcase.gsub(/\s/, '_')
ProductType.create!(translation_key: translation_key)
product_type_data.each do |locale, value|
Translation.create!(key: translation_key, locale: locale, value: value)
end
end
end
# Will import product descriptions and update the country according
# to the language in which said descriptions are.
def self.import_danish_product_descriptions(file_path)
book = Creek::Book.new(file_path)
sheet = book.sheets[2]
excel_column_names = sheet.rows.first.values[0...3].uniq
unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
index = 0
unless country = Country.find_by(code: 'dk')
abort "We could not find country with code 'dk'. Perhaps you forgot to seed countries?"
end
sheet.rows.each do |row|
# Skip the next rows (column titles and so on..).
index = index + 1
next if index < 2
# This is the last row.
break unless row.map{|c| c[1]}.any?
product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]
product = Product.find_by(article_id: product_description_data['article_id'])
# Product already has its country set, we clone it and
# assign it to this country.
if product.country_id.present?
to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
else
product.update(country_id: country.id)
to_be_assigned = product
end
ProductDescription.create!(product_id: to_be_assigned.id,
short_description: product_description_data['short_description'],
long_description: product_description_data['long_description'],
country_id: country.id
)
end
end
# Will import product descriptions and update the country according
# to the language in which said descriptions are.
def self.import_norwegian_product_descriptions(file_path)
book = Creek::Book.new(file_path)
sheet = book.sheets[4]
excel_column_names = sheet.rows.first.values[0...3].uniq
unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
index = 0
unless country = Country.find_by(code: 'no')
abort "We could not find country with code 'no'. Perhaps you forgot to seed countries?"
end
sheet.rows.each do |row|
# Skip the next rows (column titles and so on..).
index = index + 1
next if index < 2
# This is the last row.
break unless row.map{|c| c[1]}.any?
product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]
product = Product.find_by(article_id: product_description_data['article_id'])
# Product already has its country set, we clone it and
# assign it to this country.
if product.country_id.present?
to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
else
product.update(country_id: country.id)
to_be_assigned = product
end
ProductDescription.create!(product_id: to_be_assigned.id,
short_description: product_description_data['short_description'],
long_description: product_description_data['long_description'],
country_id: country.id
)
end
end
# Will import product descriptions and update the country according
# to the language in which said descriptions are.
def self.import_swedish_product_descriptions(file_path)
book = Creek::Book.new(file_path)
sheet = book.sheets[3]
excel_column_names = sheet.rows.first.values[0...3].uniq
unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
mismatching_cols = (a - b) | (b - a)
fail ColumnMismatch.new(mismatching_cols)
end
index = 0
unless country = Country.find_by(code: 'se')
abort "We could not find country with code 'se'. Perhaps you forgot to seed countries?"
end
sheet.rows.each do |row|
# Skip the next rows (column titles and so on..).
index = index + 1
next if index < 2
# This is the last row.
break unless row.map{|c| c[1]}.any?
product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]
product = Product.find_by(article_id: product_description_data['article_id'])
# Product already has its country set, we clone it and
# assign it to this country.
if product.country_id.present?
to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
else
product.update(country_id: country.id)
to_be_assigned = product
end
ProductDescription.create!(product_id: to_be_assigned.id,
short_description: product_description_data['short_description'],
long_description: product_description_data['long_description'],
country_id: country.id
)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment