Skip to content

Instantly share code, notes, and snippets.

@JonNorman
Created October 14, 2019 15:58
Show Gist options
  • Save JonNorman/5ff17140bdcc8a5f31a6dfc844000098 to your computer and use it in GitHub Desktop.
Save JonNorman/5ff17140bdcc8a5f31a6dfc844000098 to your computer and use it in GitHub Desktop.
Example Snowflake worksheet for ingesting data from an external S3 bucket.
-- following instructions in https://docs.snowflake.net/manuals/user-guide/data-load-s3-config.html
use role ACCOUNTADMIN;
CREATE STORAGE INTEGRATION poc_data_s3_integration
TYPE = EXTERNAL_STAGE
STORAGE_PROVIDER = S3
ENABLED = TRUE
STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::021353022223:role/ophan-snowflake-bucket-role'
STORAGE_ALLOWED_LOCATIONS = ('s3://ophan-snowflake-data/');
DESC INTEGRATION poc_data_s3_integration;
grant create stage on schema POC.RAW to role PUBLIC;
grant usage on integration poc_data_s3_integration to role PUBLIC;
use database POC;
-- create the table
create or replace table dfp_impressions_anon (
Time VARCHAR,
UserId VARCHAR,
IP VARCHAR,
AdvertiserId INTEGER,
OrderId INTEGER,
LineItemId INTEGER,
CreativeId INTEGER,
CreativeVersion INTEGER,
AdUnitId INTEGER,
CustomTargeting VARCHAR,
Country VARCHAR,
Region VARCHAR,
Browser VARCHAR,
OS VARCHAR,
Domain VARCHAR,
Metro VARCHAR,
City VARCHAR,
PostalCode VARCHAR,
BandWidth VARCHAR,
TimeUsec INTEGER,
GfpContentId INTEGER,
BrowserId INTEGER,
OSId INTEGER,
CountryId INTEGER,
RegionId INTEGER,
CityId INTEGER,
MetroId INTEGER,
PostalCodeId INTEGER,
BandwidthId INTEGER,
AudienceSegmentIds VARCHAR,
RequestedAdUnitSizes VARCHAR,
MobileDevice VARCHAR,
OSVersion VARCHAR,
MobileCapability VARCHAR,
MobileCarrier VARCHAR,
BandwidthGroupId INTEGER,
Product VARCHAR,
PublisherProvidedID VARCHAR,
IsCompanion BOOLEAN,
VideoPosition INTEGER,
PodPosition INTEGER,
VideoFallbackPosition INTEGER,
TargetedCustomCriteria VARCHAR,
KeyPart VARCHAR,
TimeUsec2 INTEGER,
DeviceCategory VARCHAR,
IsInterstitial BOOLEAN,
ActiveViewEligibleImpression BOOLEAN,
EstimatedBackfillRevenue FLOAT,
Buyer VARCHAR,
CreativeSizeDelivered VARCHAR,
DealType VARCHAR,
SellerReservePrice FLOAT
)
-- create a table with the same schema for backfill impressions
create or replace table dfp_backfill_impressions_anon clone dfp_impressions_anon
-- create the stage for connecting to S3
create or replace stage poc_dfp_data_stage
storage_integration = poc_data_s3_integration
url = 's3://ophan-snowflake-data'
file_format = (type = csv
field_delimiter = ','
FIELD_OPTIONALLY_ENCLOSED_BY = '"'
skip_header = 0
null_if = ('NULL', 'null')
empty_field_as_null = true
compression = auto);
use warehouse LOAD_WH;
-- populate the dfp impressions data
copy into dfp_impressions_anon
from @poc_dfp_data_stage
pattern='dfp-impressions-anon/.*';
-- populate the dfp backfill impressions data
copy into dfp_backfill_impressions_anon
from @poc_dfp_data_stage
pattern='dfp-backfill-impressions-anon/.*';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment