Skip to content

Instantly share code, notes, and snippets.

@dsmdavid
Created December 1, 2022 10:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dsmdavid/034c420bce5f1f36c462afb1b9f39f16 to your computer and use it in GitHub Desktop.
Save dsmdavid/034c420bce5f1f36c462afb1b9f39f16 to your computer and use it in GitHub Desktop.
create or replace table "SUPPLIER_ORIGINAL" as
select * from "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."SUPPLIER"; -- 10000 suppliers
create or replace table "SUPPLIER_FEW_CHANGES" clone "SUPPLIER_ORIGINAL";
-- introduce some errors
select count(*) from SUPPLIER_ORIGINAL
where s_address like '%Z%'; --3190 rows will be modified
update "SUPPLIER_FEW_CHANGES"
set S_ADDRESS = replace(S_ADDRESS, 'Z','z');
select approximate_similarity (mh) from
(
(select minhash(100, *) as mh from "SUPPLIER_ORIGINAL" )
union all
(select minhash(100, *) as mh from "SUPPLIER_FEW_CHANGES" )
); --0.55
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment