semarco/product_import.rb

## product_import.rb
# The Importer pulls Excel based customer data into the local database.
class ProductImporter
  # Exception we raise when there is a mismatch between the excel columns
  # and our database defined ones.
  class ColumnMismatch < StandardError
    def initialize(mismatched_columns)
      @mismatched_columns = mismatched_columns
      super(message)
    end

    def message
      <<-MSG.squish
      The Excel columns do not match the ones defined in the database.

      The mismatching columns are: #{@mismatched_columns.inspect}

      MSG
    end
  end

  def self.running?
    # Need to prefix like check because 'Importer' is too generic
    # Product Importer will always indicate a running or queued import
    Delayed::Job.where("handler like '%#{name}%'").where('locked_at is not NULL').any?
  end

  def self.queued?
    Delayed::Job.where("handler like '%#{name}%'").where('locked_at is NULL').any?
  end

  # Import excel file at +file_path+ to our +Product+ table.
  #
  # In case there is a mismatch between the excel columns and those from
  # the +Product+ table, a +ColumnMistmatch+ exception will be raised indicating
  # the intruding columns.
  def self.import(file_path)

    unless ProjectType.any? && ProjectTypeCategory.any?
      abort 'Products require project type data. Please import that first `rake psp:project_types:import`'
    end

    import_product_types(file_path)

    book =  Creek::Book.new(file_path)
    sheet = book.sheets[0]

    all_columns = sheet.rows.first.values

    # The ones we care about.
    product_specific_excel_columns  = all_columns[0...42].push(all_columns[167]).push(all_columns[170])
    use_case_specific_columns       = all_columns[43...139]
    product_type_specific_columns   = all_columns[140...166]

    boolean_column_names = Product.columns.select{|c| c.sql_type == "boolean" }.map(&:name)

    unless (a = product_specific_excel_columns.sort) == (b = ProductColumnMapper::COLUMN_MAPPINGS.keys.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    unless (a = use_case_specific_columns.sort) == (b = ProductColumnMapper::USE_CASE_COLUMNS.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    unless (a = product_type_specific_columns.sort) == (b = ProductColumnMapper::PRODUCT_TYPE_COLUMNS.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    index = 0

    sheet.rows.each do |row|
      # Skip the next rows (column titles and so on..).
      index = index + 1
      next if index < 2

      break unless row.map{|c| c[1]}.any?

      # Build product data hash. Note that we rely on the column mappings
      # to have the same ordering as the excel file row titles.
      product_specific_row      = row.values[0...42].push(row.values[167]).push(row.values[170])
      use_case_specific_row     = row.values[43...139]
      product_type_specific_row = row.values[140...166]

      product_data      = Hash[ProductColumnMapper::COLUMN_MAPPINGS.values.zip(product_specific_row)]
      use_case_data     = Hash[ProductColumnMapper::USE_CASE_COLUMNS.zip(use_case_specific_row)]
      product_type_data = Hash[ProductColumnMapper::PRODUCT_TYPE_COLUMNS.zip(product_type_specific_row)]

      # Process this row according to our business rules.
      product_data.each do |k, v|
        # FIXES - DEPRECATION WARNING: You attempted to assign a value which is not explicitly `true` or `false` to a boolean column. Currently this
        # value casts to `false`. This will change to match Ruby's semantics, and will cast to `true` in Rails 5. If you would like to maintain
        # the current behavior, you should explicitly handle the values you would like cast to `false`.
        # REASON: 'FSC environmental' columns contains 'X' for true, and '0' for false, and this '0' is read as 0.0 float, and doesn't get
        # translated properly to `false` value - ConnectionAdapters::Column::FALSE_VALUES includes 0 but not 0.0
        if boolean_column_names.include?(k)
          if v.try(:downcase) == 'x' || v.try(:downcase) == 'y' # 'X' or 'Y' generally mean true.
            product_data[k] = true
          elsif v.try(:zero?)
            product_data[k] = false
          elsif v.nil?
            product_data.delete(k) # instead of assigning nil we use column default values
          end
          next
        end

        # This is because Creek parses the brand id to integer
        # and does away with the leading zero, which we do not want
        # to do, since other files store the same ID as text (and hence
        # creek will not parse it there. :/)
        if k == 'brand_id'
          product_data[k] = v.rjust(4, '0')
          next
        end

        if k == 'packaging_type'
          case v
          when 'F'
            product_data[k] = 'sheets'
            next
          when 'B'
            product_data[k] = 'envelopes'
            next
          else
            product_data[k] = nil
            next
          end
        end
      end

      # Using active record, but should this prove too slow, we might
      # want to do the following:

      # 1. Execute an SQL insert.
      # 2. If still slow, wrap this whole sheet iteration in a transaction.
      product = Product.where(article_id: product_data['article_id']).first_or_create! do |product|
        product.attributes = product_data
      end

      # +bundled_data+ is project_type_category + project_type + elements. See Excel file.
      use_case_data.each do |bundled_data, value|
        next unless value.try(:downcase) == 'x'

        project_type_name = bundled_data.split("\;")[1]
        element           = bundled_data.split("\;")[2]

        # I am relaying on the assumption that it's in our database, which it should.
        prj_type = ProjectType.find_by(name: project_type_name)
        UseCase.create(product_id: product.id, project_type_id: prj_type.id, element: element)
      end

      product_type_data.each do |product_type_name, value|
        next unless value.try(:downcase) == 'x'

        translation_key = product_type_name.downcase.gsub(/\s/, '_')

        # I am relaying on the assumption that it's in our database, which it should.
        product_type = ProductType.find_by(translation_key: translation_key)
        product.update_attribute(:product_type_id, product_type.id)
      end
    end

    import_danish_product_descriptions(file_path)
    import_swedish_product_descriptions(file_path)
    import_norwegian_product_descriptions(file_path)
  end

  def self.import_product_types(file_path)
    book =  Creek::Book.new(file_path)
    sheet = book.sheets[1]

    excel_column_names = sheet.rows.first.values[0...5].uniq

    unless (a = excel_column_names.sort) == (b = ProductTypeColumnMapper::COLUMN_MAPPINGS.keys.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    index = 0

    sheet.rows.each do |row|

      # Skip the next rows (column titles and so on..).
      index = index + 1
      next if index < 2

      # This is the last row.
      break unless row.map{|c| c[1]}.any?

      product_type_data = Hash[ProductTypeColumnMapper::COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]

      translation_key = product_type_data['en_gb'].downcase.gsub(/\s/, '_')

      ProductType.create!(translation_key: translation_key)

      product_type_data.each do |locale, value|
        Translation.create!(key: translation_key, locale: locale, value: value)
      end
    end
  end

  # Will import product descriptions and update the country according
  # to the language in which said descriptions are.
  def self.import_danish_product_descriptions(file_path)
    book =  Creek::Book.new(file_path)
    sheet = book.sheets[2]

    excel_column_names = sheet.rows.first.values[0...3].uniq

    unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    index = 0

    unless country = Country.find_by(code: 'dk')
      abort "We could not find country with code 'dk'. Perhaps you forgot to seed countries?"
    end

    sheet.rows.each do |row|

      # Skip the next rows (column titles and so on..).
      index = index + 1
      next if index < 2

      # This is the last row.
      break unless row.map{|c| c[1]}.any?

      product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]

      product = Product.find_by(article_id: product_description_data['article_id'])

      # Product already has its country set, we clone it and
      # assign it to this country.
      if product.country_id.present?
        to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
      else
        product.update(country_id: country.id)
        to_be_assigned = product
      end

      ProductDescription.create!(product_id: to_be_assigned.id,
                                 short_description: product_description_data['short_description'],
                                 long_description: product_description_data['long_description'],
                                 country_id: country.id
                                )
    end
  end

  # Will import product descriptions and update the country according
  # to the language in which said descriptions are.
  def self.import_norwegian_product_descriptions(file_path)
    book =  Creek::Book.new(file_path)
    sheet = book.sheets[4]

    excel_column_names = sheet.rows.first.values[0...3].uniq

    unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    index = 0

    unless country = Country.find_by(code: 'no')
      abort "We could not find country with code 'no'. Perhaps you forgot to seed countries?"
    end

    sheet.rows.each do |row|

      # Skip the next rows (column titles and so on..).
      index = index + 1
      next if index < 2

      # This is the last row.
      break unless row.map{|c| c[1]}.any?

      product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]

      product = Product.find_by(article_id: product_description_data['article_id'])

      # Product already has its country set, we clone it and
      # assign it to this country.
      if product.country_id.present?
        to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
      else
        product.update(country_id: country.id)
        to_be_assigned = product
      end

      ProductDescription.create!(product_id: to_be_assigned.id,
                                 short_description: product_description_data['short_description'],
                                 long_description: product_description_data['long_description'],
                                 country_id: country.id
                                )
    end
  end

  # Will import product descriptions and update the country according
  # to the language in which said descriptions are.
  def self.import_swedish_product_descriptions(file_path)
    book =  Creek::Book.new(file_path)
    sheet = book.sheets[3]

    excel_column_names = sheet.rows.first.values[0...3].uniq

    unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
      mismatching_cols = (a - b) | (b - a)
      fail ColumnMismatch.new(mismatching_cols)
    end

    index = 0

    unless country = Country.find_by(code: 'se')
      abort "We could not find country with code 'se'. Perhaps you forgot to seed countries?"
    end

    sheet.rows.each do |row|

      # Skip the next rows (column titles and so on..).
      index = index + 1
      next if index < 2

      # This is the last row.
      break unless row.map{|c| c[1]}.any?

      product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{|c| c[1]})]

      product = Product.find_by(article_id: product_description_data['article_id'])

      # Product already has its country set, we clone it and
      # assign it to this country.
      if product.country_id.present?
        to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
      else
        product.update(country_id: country.id)
        to_be_assigned = product
      end

      ProductDescription.create!(product_id: to_be_assigned.id,
                                 short_description: product_description_data['short_description'],
                                 long_description: product_description_data['long_description'],
                                 country_id: country.id
                                )
    end
  end
end
	# The Importer pulls Excel based customer data into the local database.
	class ProductImporter
	# Exception we raise when there is a mismatch between the excel columns
	# and our database defined ones.
	class ColumnMismatch < StandardError
	def initialize(mismatched_columns)
	@mismatched_columns = mismatched_columns
	super(message)
	end

	def message
	<<-MSG.squish
	The Excel columns do not match the ones defined in the database.

	The mismatching columns are: #{@mismatched_columns.inspect}

	MSG
	end
	end

	def self.running?
	# Need to prefix like check because 'Importer' is too generic
	# Product Importer will always indicate a running or queued import
	Delayed::Job.where("handler like '%#{name}%'").where('locked_at is not NULL').any?
	end

	def self.queued?
	Delayed::Job.where("handler like '%#{name}%'").where('locked_at is NULL').any?
	end

	# Import excel file at +file_path+ to our +Product+ table.
	#
	# In case there is a mismatch between the excel columns and those from
	# the +Product+ table, a +ColumnMistmatch+ exception will be raised indicating
	# the intruding columns.
	def self.import(file_path)

	unless ProjectType.any? && ProjectTypeCategory.any?
	abort 'Products require project type data. Please import that first `rake psp:project_types:import`'
	end

	import_product_types(file_path)

	book = Creek::Book.new(file_path)
	sheet = book.sheets[0]

	all_columns = sheet.rows.first.values

	# The ones we care about.
	product_specific_excel_columns = all_columns[0...42].push(all_columns[167]).push(all_columns[170])
	use_case_specific_columns = all_columns[43...139]
	product_type_specific_columns = all_columns[140...166]

	boolean_column_names = Product.columns.select{\|c\| c.sql_type == "boolean" }.map(&:name)

	unless (a = product_specific_excel_columns.sort) == (b = ProductColumnMapper::COLUMN_MAPPINGS.keys.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	unless (a = use_case_specific_columns.sort) == (b = ProductColumnMapper::USE_CASE_COLUMNS.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	unless (a = product_type_specific_columns.sort) == (b = ProductColumnMapper::PRODUCT_TYPE_COLUMNS.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	index = 0

	sheet.rows.each do \|row\|
	# Skip the next rows (column titles and so on..).
	index = index + 1
	next if index < 2

	break unless row.map{\|c\| c[1]}.any?

	# Build product data hash. Note that we rely on the column mappings
	# to have the same ordering as the excel file row titles.
	product_specific_row = row.values[0...42].push(row.values[167]).push(row.values[170])
	use_case_specific_row = row.values[43...139]
	product_type_specific_row = row.values[140...166]

	product_data = Hash[ProductColumnMapper::COLUMN_MAPPINGS.values.zip(product_specific_row)]
	use_case_data = Hash[ProductColumnMapper::USE_CASE_COLUMNS.zip(use_case_specific_row)]
	product_type_data = Hash[ProductColumnMapper::PRODUCT_TYPE_COLUMNS.zip(product_type_specific_row)]

	# Process this row according to our business rules.
	product_data.each do \|k, v\|
	# FIXES - DEPRECATION WARNING: You attempted to assign a value which is not explicitly `true` or `false` to a boolean column. Currently this
	# value casts to `false`. This will change to match Ruby's semantics, and will cast to `true` in Rails 5. If you would like to maintain
	# the current behavior, you should explicitly handle the values you would like cast to `false`.
	# REASON: 'FSC environmental' columns contains 'X' for true, and '0' for false, and this '0' is read as 0.0 float, and doesn't get
	# translated properly to `false` value - ConnectionAdapters::Column::FALSE_VALUES includes 0 but not 0.0
	if boolean_column_names.include?(k)
	if v.try(:downcase) == 'x' \|\| v.try(:downcase) == 'y' # 'X' or 'Y' generally mean true.
	product_data[k] = true
	elsif v.try(:zero?)
	product_data[k] = false
	elsif v.nil?
	product_data.delete(k) # instead of assigning nil we use column default values
	end
	next
	end

	# This is because Creek parses the brand id to integer
	# and does away with the leading zero, which we do not want
	# to do, since other files store the same ID as text (and hence
	# creek will not parse it there. :/)
	if k == 'brand_id'
	product_data[k] = v.rjust(4, '0')
	next
	end

	if k == 'packaging_type'
	case v
	when 'F'
	product_data[k] = 'sheets'
	next
	when 'B'
	product_data[k] = 'envelopes'
	next
	else
	product_data[k] = nil
	next
	end
	end
	end

	# Using active record, but should this prove too slow, we might
	# want to do the following:

	# 1. Execute an SQL insert.
	# 2. If still slow, wrap this whole sheet iteration in a transaction.
	product = Product.where(article_id: product_data['article_id']).first_or_create! do \|product\|
	product.attributes = product_data
	end

	# +bundled_data+ is project_type_category + project_type + elements. See Excel file.
	use_case_data.each do \|bundled_data, value\|
	next unless value.try(:downcase) == 'x'

	project_type_name = bundled_data.split("\;")[1]
	element = bundled_data.split("\;")[2]

	# I am relaying on the assumption that it's in our database, which it should.
	prj_type = ProjectType.find_by(name: project_type_name)
	UseCase.create(product_id: product.id, project_type_id: prj_type.id, element: element)
	end

	product_type_data.each do \|product_type_name, value\|
	next unless value.try(:downcase) == 'x'

	translation_key = product_type_name.downcase.gsub(/\s/, '_')

	# I am relaying on the assumption that it's in our database, which it should.
	product_type = ProductType.find_by(translation_key: translation_key)
	product.update_attribute(:product_type_id, product_type.id)
	end
	end

	import_danish_product_descriptions(file_path)
	import_swedish_product_descriptions(file_path)
	import_norwegian_product_descriptions(file_path)
	end

	def self.import_product_types(file_path)
	book = Creek::Book.new(file_path)
	sheet = book.sheets[1]

	excel_column_names = sheet.rows.first.values[0...5].uniq

	unless (a = excel_column_names.sort) == (b = ProductTypeColumnMapper::COLUMN_MAPPINGS.keys.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	index = 0

	sheet.rows.each do \|row\|

	# Skip the next rows (column titles and so on..).
	index = index + 1
	next if index < 2

	# This is the last row.
	break unless row.map{\|c\| c[1]}.any?

	product_type_data = Hash[ProductTypeColumnMapper::COLUMN_MAPPINGS.values.zip(row.map{\|c\| c[1]})]

	translation_key = product_type_data['en_gb'].downcase.gsub(/\s/, '_')

	ProductType.create!(translation_key: translation_key)

	product_type_data.each do \|locale, value\|
	Translation.create!(key: translation_key, locale: locale, value: value)
	end
	end
	end

	# Will import product descriptions and update the country according
	# to the language in which said descriptions are.
	def self.import_danish_product_descriptions(file_path)
	book = Creek::Book.new(file_path)
	sheet = book.sheets[2]

	excel_column_names = sheet.rows.first.values[0...3].uniq

	unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	index = 0

	unless country = Country.find_by(code: 'dk')
	abort "We could not find country with code 'dk'. Perhaps you forgot to seed countries?"
	end

	sheet.rows.each do \|row\|

	# Skip the next rows (column titles and so on..).
	index = index + 1
	next if index < 2

	# This is the last row.
	break unless row.map{\|c\| c[1]}.any?

	product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{\|c\| c[1]})]

	product = Product.find_by(article_id: product_description_data['article_id'])

	# Product already has its country set, we clone it and
	# assign it to this country.
	if product.country_id.present?
	to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
	else
	product.update(country_id: country.id)
	to_be_assigned = product
	end

	ProductDescription.create!(product_id: to_be_assigned.id,
	short_description: product_description_data['short_description'],
	long_description: product_description_data['long_description'],
	country_id: country.id
	)
	end
	end

	# Will import product descriptions and update the country according
	# to the language in which said descriptions are.
	def self.import_norwegian_product_descriptions(file_path)
	book = Creek::Book.new(file_path)
	sheet = book.sheets[4]

	excel_column_names = sheet.rows.first.values[0...3].uniq

	unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	index = 0

	unless country = Country.find_by(code: 'no')
	abort "We could not find country with code 'no'. Perhaps you forgot to seed countries?"
	end

	sheet.rows.each do \|row\|

	# Skip the next rows (column titles and so on..).
	index = index + 1
	next if index < 2

	# This is the last row.
	break unless row.map{\|c\| c[1]}.any?

	product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{\|c\| c[1]})]

	product = Product.find_by(article_id: product_description_data['article_id'])

	# Product already has its country set, we clone it and
	# assign it to this country.
	if product.country_id.present?
	to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
	else
	product.update(country_id: country.id)
	to_be_assigned = product
	end

	ProductDescription.create!(product_id: to_be_assigned.id,
	short_description: product_description_data['short_description'],
	long_description: product_description_data['long_description'],
	country_id: country.id
	)
	end
	end

	# Will import product descriptions and update the country according
	# to the language in which said descriptions are.
	def self.import_swedish_product_descriptions(file_path)
	book = Creek::Book.new(file_path)
	sheet = book.sheets[3]

	excel_column_names = sheet.rows.first.values[0...3].uniq

	unless (a = excel_column_names.sort) == (b = ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.keys.sort)
	mismatching_cols = (a - b) \| (b - a)
	fail ColumnMismatch.new(mismatching_cols)
	end

	index = 0

	unless country = Country.find_by(code: 'se')
	abort "We could not find country with code 'se'. Perhaps you forgot to seed countries?"
	end

	sheet.rows.each do \|row\|

	# Skip the next rows (column titles and so on..).
	index = index + 1
	next if index < 2

	# This is the last row.
	break unless row.map{\|c\| c[1]}.any?

	product_description_data = Hash[ProductColumnMapper::DESCRIPTION_COLUMN_MAPPINGS.values.zip(row.map{\|c\| c[1]})]

	product = Product.find_by(article_id: product_description_data['article_id'])

	# Product already has its country set, we clone it and
	# assign it to this country.
	if product.country_id.present?
	to_be_assigned = Product.create!(product.attributes.except('id').merge(country_id: country.id))
	else
	product.update(country_id: country.id)
	to_be_assigned = product
	end

	ProductDescription.create!(product_id: to_be_assigned.id,
	short_description: product_description_data['short_description'],
	long_description: product_description_data['long_description'],
	country_id: country.id
	)
	end
	end
	end