Last active
February 22, 2018 07:39
-
-
Save kanevk/e40c38903ee50c2342ceb13521708a06 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ActiveRecord::Base.connection.execute(<<-SQL) | |
DROP TABLE IF EXISTS non_duplicate_brand_tuples; | |
CREATE TABLE non_duplicate_brand_tuples ( | |
id serial primary key, | |
main_brand_id integer NOT NULL, | |
non_duplicate_brand_id integer NOT NULL | |
); | |
SQL | |
class NonDuplicateBrandTuples < ActiveRecord::Base | |
end | |
# Seeds | |
20.downto(1).each_with_index do |first, second| | |
NonDuplicateBrandTuples.create main_brand_id: first, non_duplicate_brand_id: second | |
end | |
def sql_implementation | |
result = | |
NonDuplicateBrandTuples.connection.execute(<<~SQL) | |
WITH all_non_duplicates AS ( | |
SELECT main_brand_id AS main, non_duplicate_brand_id AS non_duplicate | |
FROM non_duplicate_brand_tuples | |
UNION | |
SELECT non_duplicate_brand_id AS main, main_brand_id AS non_duplicate | |
FROM non_duplicate_brand_tuples | |
) | |
SELECT main, array_agg(non_duplicate) FROM all_non_duplicates GROUP BY main; | |
SQL | |
result. | |
values. | |
to_h. | |
transform_values { |v| PG::TextDecoder::Array.new.decode(v).map(&:to_i) } | |
end | |
def ruby_implementation | |
NonDuplicateBrandTuples. | |
pluck(:main_brand_id, :non_duplicate_brand_id). | |
map { |*tuple| tuple.sort }. | |
to_set | |
end | |
non_duplicates = sql_implementation | |
# non_duplicates = ruby_implementation # Uncomment for using Ruby implementation | |
duplicates = [] | |
1.upto(100_000).each_with_index do |remote_brand_id, progress| | |
puts progress | |
1.upto(100).each do |brand_id| | |
next if non_duplicates[brand_id]&.include?(brand_id) | |
# next if non_duplicates.include? [remote_brand_id, brand_id].sort # Uncomment for using Ruby implementation | |
duplicates << [remote_brand_id, brand_id] | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment