Skip to content

Instantly share code, notes, and snippets.

@ahkim
Last active January 24, 2020 09:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ahkim/a32a77a668531a6fe54c9f1b614418c4 to your computer and use it in GitHub Desktop.
Save ahkim/a32a77a668531a6fe54c9f1b614418c4 to your computer and use it in GitHub Desktop.
These code snippets are to demonstrate how you should use PolyBase in step by step. You should have a blob storage and storage secret handy beforehand and execute this from SQL Server 2016 or Azure DW, etc.
DECLARE @serverName VARCHAR(20)
DECLARE @storageSecret VARCHAR(MAX)
DECLARE @storageLocation VARCHAR(MAX)
SET @serverName = convert(VARCHAR(20),(SELECT SERVERPROPERTY('ServerName')))
IF(@serverName = 'peet-bi-dev')
BEGIN
SET @storageSecret = '{your_own_secret}'
SET @storageLocation = 'wasbs://{container_name}@{storage_name}.blob.core.windows.net'
END
ELSE IF(@serverName = 'peet-bi-uat')
BEGIN
SET @storageSecret = '{your_own_secret}'
SET @storageLocation = 'wasbs://{container_name}@{storage_name}.blob.core.windows.net'
END
ELSE IF(@serverName = 'peet-bi-tst')
BEGIN
SET @storageSecret = '{your_own_secret}'
SET @storageLocation = 'wasbs://{container_name}@{storage_name}.blob.core.windows.net'
END
ELSE IF(@serverName = 'peet-bi-prd')
BEGIN
print 'prd'
END
-- Following are one-off for each DW instance
-- A: Create a master key.
-- Only necessary if one does not already exist.
-- Required to encrypt the credential secret in the next step.
If(select Count(*) from sys.symmetric_keys where name like '%DatabaseMasterKey%') = 0
CREATE MASTER KEY;
-- B: Create a database scoped credential
-- IDENTITY: Provide any string, it is not used for authentication to Azure storage.
-- SECRET: Provide your Azure storage account key.
DECLARE @SQL_SCRIPT VARCHAR(MAX)
If(select Count(*) from sys.database_credentials where name like '%AzureStorageCredential%') = 0
BEGIN
DECLARE @CREATE_DATABASE_SCOPED_CREDENTIAL VARCHAR(MAX)
SET @CREATE_DATABASE_SCOPED_CREDENTIAL = 'CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential
WITH
IDENTITY = ''user'',
SECRET = ''{STORAGE_SECRET}''
;'
SET @SQL_SCRIPT = REPLACE(@CREATE_DATABASE_SCOPED_CREDENTIAL, '{STORAGE_SECRET}', @storageSecret)
EXECUTE (@SQL_SCRIPT)
END
-- C: Create an external data source
-- TYPE: HADOOP - PolyBase uses Hadoop APIs to access data in Azure blob storage.
-- LOCATION: Provide Azure storage account name and blob container name.
-- CREDENTIAL: Provide the credential created in the previous step.
If(select Count(*) from sys.external_data_sources where name like '%AzureStorage%') = 0
BEGIN
DECLARE @CREATE_AZURE_STORAGE VARCHAR(MAX)
SET @CREATE_AZURE_STORAGE = 'CREATE EXTERNAL DATA SOURCE AzureStorage
WITH (
TYPE = HADOOP,
LOCATION = ''{STORAGE_LOCATION}'',
CREDENTIAL = AzureStorageCredential
);'
SET @SQL_SCRIPT = REPLACE(@CREATE_AZURE_STORAGE, '{STORAGE_LOCATION}', @storageLocation)
EXECUTE (@SQL_SCRIPT)
END
-- D: Create an external file format
-- FORMAT_TYPE: Type of file format in Azure storage (supported: DELIMITEDTEXT, RCFILE, ORC, PARQUET).
-- FORMAT_OPTIONS: Specify field terminator, string delimiter, date format etc. for delimited text files.
-- Specify DATA_COMPRESSION method if data is compressed.
IF NOT EXISTS(select * from sys.external_file_formats where name like '%TextFile%')
BEGIN
Create EXTERNAL FILE FORMAT TextFile
WITH (
FORMAT_TYPE = DelimitedText,
FORMAT_OPTIONS (FIELD_TERMINATOR = ',',
DATE_FORMAT = 'yyyy-MM-dd HH:mm')
);
END
GO
-- Blix_Location
--if (select count(*) from sysobjects where xtype = 'ET' and name = 'Blix_Location') >= 1
-- DROP EXTERNAL TABLE ext.Blix_Location;
--GO
CREATE EXTERNAL TABLE ext.Blix_Location (
[record_key] varchar(50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[LocationID] varchar(20) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[LocationName] varchar(100) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[effective_date] datetime NULL
)
WITH (
LOCATION='/Blix/Blix_Location',
DATA_SOURCE=AzureStorage,
FILE_FORMAT=TextFile
);
GO
--if exists(select * from sys.sysobjects where xtype = 'P' and name = 'usp_CTAS_Blix_Location')
-- Drop Procedure ext.usp_CTAS_Blix_Location;
--GO
Create Procedure ext.usp_CTAS_Blix_Location
AS
BEGIN
Create Table staging.Blix_Location_upsert
WITH
(
Distribution=HASH(record_key),
CLUSTERED INDEX (record_key)
)
AS
SELECT * FROM ext.Blix_Location;
RENAME OBJECT staging.Blix_Location TO Blix_Location_old;
RENAME OBJECT staging.Blix_Location_upsert TO Blix_Location;
DROP TABLE staging.Blix_Location_old;
--move to DW
execute dbo.usp_ETL_BLIX_Location
--select * from staging.Blix_Location
--delete from staging.Blix_Location
END;
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment