Last active
March 1, 2024 15:22
-
-
Save stevewithington/c9fa188b8717722e804ea86420e739d3 to your computer and use it in GitHub Desktop.
How to compare Snowflake UTF-8 formatted data to Microsoft SQL Server UTF-16 formatted data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*---------------------------------------------------------------------------------------- | |
Script: udf_convert_to_utf16.sql | |
Author: [Steve Withington](steve@digitalmine.com) | |
Dependencies: n/a | |
Purpose: MSSQL data is stored in UTF-16 format (UTF-8 support didn't roll out until SQL Server 2019) | |
Snowflake data is stored in UTF-8 format. | |
This function will convert a UTF-8 string to a UTF-16, MD5 string for comparison purposes | |
Note: You probably don't want to use this function directly. | |
Instead, use the UDF_CONVERT_TO_UTF16_MD5() function. | |
----------------------------------------------------------------------------------------*/ | |
USE ROLE {SNOWFLAKE_ROLE_NAME}; | |
USE WAREHOUSE {SNOWFLAKE_WAREHOUSE_NAME}; | |
USE SCHEMA {SNOWFLAKE_DB_NAME}.{SNOWFLAKE_SCHEMA_NAME}; | |
-- --------------------------------------------------------------------------------------- | |
-- * User Defined Function: UDF_CONVERT_TO_UTF16 | |
-- --------------------------------------------------------------------------------------- | |
DROP FUNCTION IF EXISTS UDF_CONVERT_TO_UTF16(VARCHAR); | |
CREATE OR REPLACE FUNCTION UDF_CONVERT_TO_UTF16(STR VARCHAR) | |
RETURNS BINARY | |
LANGUAGE JAVASCRIPT | |
COMMENT = 'This function will convert a UTF-8 string to a UTF-16 binary.' | |
AS | |
$$ | |
var str = STR; | |
var buf = new ArrayBuffer(str.length*2); | |
var bufView = new Uint8Array(buf); | |
for (var i=0, strLen=str.length; i < strLen; i++ ) { | |
bufView[i*2] = str.charCodeAt(i); | |
} | |
return bufView; | |
$$ | |
; | |
-- --------------------------------------------------------------------------------------- | |
-- * Verify | |
-- --------------------------------------------------------------------------------------- | |
DESC FUNCTION UDF_CONVERT_TO_UTF16(VARCHAR); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*---------------------------------------------------------------------------------------- | |
Script: udf_convert_to_utf16_md5.sql | |
Author: [Steve Withington](steve@digitalmine.com) | |
Dependencies: `udf_convert_to_utf16` function (see associated file) | |
Purpose: MSSQL data is stored in UTF-16 format (UTF-8 support didn't roll out until SQL Server 2019) | |
Snowflake data is stored in UTF-8 format. | |
This function will convert a UTF-8 string to a UTF-16, MD5 string for comparison purposes | |
Example Usage: | |
SELECT {SNOWFLAKE_DB_NAME}.{SNOWFLAKE_SCHEMA_NAME}.UDF_CONVERT_TO_UTF16_MD5(CONCAT('SOME','CONCATENATED','STRING')) AS MD5_UTF16_STRING; | |
SELECT {SNOWFLAKE_DB_NAME}.{SNOWFLAKE_SCHEMA_NAME}.UDF_CONVERT_TO_UTF16_MD5('1000031061654000160') AS MD5_UTF16_STRING; | |
Example Query for MSSQL for Comparison: | |
SELECT CONVERT(NVARCHAR(40), HashBytes('MD5', (COALESCE(FIELD01,'') + COALESCE(FIELD02,'') + COALESCE(FIELD03,''))), 2) AS UTF16_MD5_KEY, * FROM {DB}.{SCHEMA}.{TABLE}; | |
----------------------------------------------------------------------------------------*/ | |
USE ROLE {SNOWFLAKE_ROLE_NAME}; | |
USE WAREHOUSE {SNOWFLAKE_WAREHOUSE_NAME}; | |
USE SCHEMA {SNOWFLAKE_DB_NAME}.{SNOWFLAKE_SCHEMA_NAME}; | |
-- --------------------------------------------------------------------------------------- | |
-- * User Defined Function: UDF_CONVERT_TO_UTF16_MD5 | |
-- --------------------------------------------------------------------------------------- | |
DROP FUNCTION IF EXISTS UDF_CONVERT_TO_UTF16_MD5(VARCHAR); | |
CREATE OR REPLACE FUNCTION UDF_CONVERT_TO_UTF16_MD5(STR VARCHAR) | |
RETURNS VARCHAR | |
LANGUAGE SQL | |
COMMENT = 'This function will convert a UTF-8 string to a UTF-16, MD5 string.' | |
AS | |
$$ | |
UPPER(MD5(UDF_CONVERT_TO_UTF16(STR))) | |
$$ | |
; | |
-- --------------------------------------------------------------------------------------- | |
-- * Verify | |
-- --------------------------------------------------------------------------------------- | |
DESC FUNCTION UDF_CONVERT_TO_UTF16_MD5(VARCHAR); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment