Skip to content

Instantly share code, notes, and snippets.

@ehlertij
Last active March 1, 2021 14:53
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save ehlertij/5b8c47076b14d7b281d5536cb367786b to your computer and use it in GitHub Desktop.
snowplow-snowflake-setup
{
"schema":"iglu:com.snowplowanalytics.dataflowrunner/ClusterConfig/avro/1-1-0",
"data":{
"name":"dataflow-runner - snowflake transformer staging",
"logUri":"s3://my-bucket/logs/",
"region":"us-east-1",
"credentials":{
"accessKeyId":"env",
"secretAccessKey":"env"
},
"roles":{
"jobflow":"EMR_EC2_DefaultRole",
"service":"EMR_DefaultRole"
},
"ec2":{
"amiVersion":"6.2.0",
"keyName":"snowflake-loader-staging",
"location":{
"vpc":{
"subnetId":"subnet-12345"
}
},
"instances":{
"master":{
"type":"m5.xlarge",
"bid":"0.192"
},
"core":{
"type":"m5.xlarge",
"count":1,
"bid":"0.192"
},
"task":{
"type":"m5.xlarge",
"count":0,
"bid":"0.192"
}
}
},
"tags":[ ],
"bootstrapActionConfigs":[ ],
"configurations":[
{
"classification":"core-site",
"properties":{
"Io.file.buffer.size":"65536"
}
},
{
"classification":"mapred-site",
"properties":{
"Mapreduce.user.classpath.first":"true"
}
},
{
"classification":"yarn-site",
"properties":{
"yarn.resourcemanager.am.max-attempts":"1"
}
},
{
"classification":"spark",
"properties":{
"maximizeResourceAllocation":"true"
}
}
],
"applications":[ "Hadoop", "Spark" ]
}
}
{
"schema": "iglu:com.snowplowanalytics.snowplow.storage/snowflake_config/jsonschema/1-0-3",
"data": {
"name": "Snowflake config",
"awsRegion": "us-east-1",
"auth": {
"integrationName": "SNOWPLOW_S3_INTEGRATION"
},
"manifest": "snowplow-snowflake-manifest",
"snowflakeRegion": "us-east-1",
"database": "STAGING",
"input": "s3://my-bucket/enriched/archive/",
"stage": "snowplow_stage",
"badOutputUrl": "s3://my-bucket/snowflake-bad/",
"stageUrl": "s3://my-bucket/snowflake-stage/",
"warehouse": "STAGING",
"schema": "atomic",
"account": "account",
"username": "SNOWPLOW",
"password": {
"ec2ParameterStore": {
"parameterName": "snowplow.snowflake.password"
}
},
"purpose": "ENRICHED_EVENTS"
}
}
enrich {
streams {
in {
raw = ${?COLLECTOR_STREAM_GOOD}
}
out {
enriched = ${?ENRICH_STREAM_GOOD}
bad = ${?ENRICH_STREAM_BAD}
partitionKey = "event_id"
}
sourceSink {
enabled = kinesis
region = us-east-1
aws {
accessKey = default
secretKey = default
}
maxRecords = 10000
initialPosition = TRIM_HORIZON
backoffPolicy {
minBackoff = 1000
maxBackoff = 5000
}
}
buffer {
byteLimit = 1000000000
recordLimit = 10
timeLimit = 5000
}
appName = ${?ENRICH_DYNAMO_DB_NAME}
}
}
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iteratively",
"priority": 0,
"vendorPrefixes": [ "ly.iterative" ],
"connection": {
"http": {
"uri": "https://api.iterative.ly/iglu",
"apikey": "abcdef"
}
}
},
{
"name": "Iglu Central",
"priority": 1,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
}
]
}
}
{
"schema": "iglu:com.snowplowanalytics.snowplow.storage/amazon_dynamodb_config/jsonschema/2-0-0",
"data": {
"name": "eventsManifestStaging",
"auth": null,
"awsRegion": "us-east-1",
"dynamodbTable": "snowplow-integration-test-crossbatch-dedupe",
"id": "56799a26-980c-4148-8bd9-c021b988c669",
"purpose": "EVENTS_MANIFEST"
}
}
[
...
{
"$id": "https://iterative.ly/company/831c4746-45a7-4353-86cf-503ec6c6b7d0/event/PageViewed/version/1-0-0",
"$schema": "http://iglucentral.com/schemas/com.snowplowanalytics.self-desc/schema/jsonschema/1-0-0#",
"title": "PageViewed",
"description": "Called when a user views a page. Only used for Amplitude. Referrer, UTM, and GCLID are automatically recorded by Amplitude.",
"type": "object",
"properties": {
"page_referrer": {
"description": "URL of the previous page (if known).",
"type": "string"
},
"page_path": {
"description": "Canonical path of the page.",
"type": "string"
},
"page_action": {
"description": "The action on the controller that generated the page.",
"type": "string"
},
"page_controller": {
"description": "The Dribbble app's controller that generated the page.",
"type": "string"
},
"page_search": {
"description": "Query string portion of the page URL.",
"type": "string"
},
"page_url": {
"description": "URL of the page.",
"type": "string"
},
"page_title": {
"description": "Title of the page.",
"type": "string"
}
},
"additionalProperties": false,
"required": [
"page_path",
"page_action",
"page_controller",
"page_search",
"page_url",
"page_title"
],
"self": {
"vendor": "ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0",
"name": "PageViewed",
"format": "jsonschema",
"version": "1-0-0"
}
},
...
]
{
"schema":"iglu:com.snowplowanalytics.dataflowrunner/PlaybookConfig/avro/1-0-1",
"data":{
"region":"us-east-1",
"credentials":{
"accessKeyId":"env",
"secretAccessKey":"env"
},
"steps":[
{
"type": "CUSTOM_JAR",
"name": "Flatten enriched data",
"actionOnFailure": "CANCEL_AND_WAIT",
"jar": "s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar",
"arguments": [
"s3://my-bucket/s3_flatten.sh"
]
},
{
"type": "CUSTOM_JAR",
"name": "Staging enriched data",
"actionOnFailure": "CANCEL_AND_WAIT",
"jar": "/usr/share/aws/emr/s3-dist-cp/lib/s3-dist-cp.jar",
"arguments": [
"--src", "s3://my-bucket/enrich-flattened/",
"--dest", "s3://my-bucket/enriched/archive/run={{nowWithFormat "2006-01-02-15-04-05"}}/",
"--s3Endpoint", "s3.amazonaws.com",
"--deleteOnSuccess"
]
},
{
"type":"CUSTOM_JAR",
"name":"Snowflake Transformer",
"actionOnFailure":"CANCEL_AND_WAIT",
"jar":"command-runner.jar",
"arguments":[
"spark-submit",
"--deploy-mode",
"cluster",
"--class",
"com.snowplowanalytics.snowflake.transformer.Main",
"s3://snowplow-hosted-assets/4-storage/snowflake-loader/snowplow-snowflake-transformer-0.8.0.jar",
"--config",
"{{base64File "./config.json"}}",
"--resolver",
"{{base64File "./resolver.json"}}",
"--events-manifest",
"{{base64File "./events_manifest.json"}}"
]
},
{
"type":"CUSTOM_JAR",
"name":"Snowflake Loader",
"actionOnFailure":"CANCEL_AND_WAIT",
"jar":"s3://snowplow-hosted-assets/4-storage/snowflake-loader/snowplow-snowflake-loader-0.8.0.jar",
"arguments":[
"load",
"--base64",
"--config",
"{{base64File "./config.json"}}",
"--resolver",
"{{base64File "./resolver.json"}}"
]
}
],
"tags":[ ]
}
}
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
}
]
}
}
web 2021-02-26 16:41:55.911 2021-02-26 16:41:53.180 2021-02-26 16:41:52.787 unstruct ef59a327-ce50-42a7-b851-4178bccdc83b itly js-2.10.2 ssc-2.1.0-kinesis stream-enrich-1.4.2-common-1.4.2 217.138.198.120 62548142 e5f37f9c-09f4-4b21-b67a-1088a5ce6ef3 23 7029c6ed-6795-4091-a9f7-f1d5515f113e http://localhost:3000/ http localhost 3000 / {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0/PageViewed/jsonschema/3-0-0","data":{"page_title":"Dribbble - Discover the World’s Top Designers & Creative Professionals","page_url":"http://localhost:3000/","page_path":"/","page_search":"","page_referrer":"","is_authenticated":false,"page_controller":"screenshots/filters","page_action":"index"}}} Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36 en-US 1 0 0 0 0 0 0 0 0 1 24 1422 1304 America/Chicago 2560 1440 UTF-8 1407 3036 2021-02-26 16:41:52.789 e290f25a-b059-4944-8c92-d91e4a63074b 2021-02-26 16:41:53.178 ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0 PageViewed jsonschema 3-0-0 web 2021-02-26 16:41:55.929 2021-02-26 16:41:53.231 2021-02-26 16:41:52.899 unstruct 10b133ec-a517-41f0-8a6e-2ad76b46f7f0 itly js-2.10.2 ssc-2.1.0-kinesis stream-enrich-1.4.2-common-1.4.2 217.138.198.120 62548142 e5f37f9c-09f4-4b21-b67a-1088a5ce6ef3 23 200aeb20-4892-4cfc-8cc0-cc882a29ae33 http://localhost:3000/ http localhost 3000 / {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0/PageViewed/jsonschema/3-0-0","data":{"page_title":"Dribbble - Discover the World’s Top Designers & Creative Professionals","page_url":"http://localhost:3000/?page=2&per_page=24&exclude_shot_ids=%2C8%2C5%2C1%2C2%2C3%2C6%2C4%2C9%2C7%2C11%2C14%2C10%2C13%2C12%2C18%2C15%2C16%2C17%2C19%2C20%2C21%2C27%2C22%2C31","page_path":"/","page_search":"?page=2&per_page=24&exclude_shot_ids=%2C8%2C5%2C1%2C2%2C3%2C6%2C4%2C9%2C7%2C11%2C14%2C10%2C13%2C12%2C18%2C15%2C16%2C17%2C19%2C20%2C21%2C27%2C22%2C31","page_referrer":"","is_authenticated":false,"page_controller":"screenshots/filters","page_action":"index"}}} Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36 en-US 1 0 0 0 0 0 0 0 0 1 24 1422 1304 America/Chicago 2560 1440 UTF-8 1407 3058 2021-02-26 16:41:53.145 e290f25a-b059-4944-8c92-d91e4a63074b 2021-02-26 16:41:52.985 ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0 PageViewed jsonschema 3-0-0 web 2021-02-26 16:42:14.918 2021-02-26 16:42:13.637 2021-02-26 16:42:13.491 unstruct 340b40c7-407a-4a9d-a5b0-f1c621a1a8fb itly js-2.10.2 ssc-2.1.0-kinesis stream-enrich-1.4.2-common-1.4.2 217.138.198.120 62548142 e5f37f9c-09f4-4b21-b67a-1088a5ce6ef3 23 d3315443-c835-496b-8887-7309bf09c3a2 http://localhost:3000/shots/1-Et-eius-i-1 http://localhost:3000/ http localhost 3000 /shots/1-Et-eius-i-1 http localhost 3000 / {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0/PageViewed/jsonschema/3-0-0","data":{"page_title":"","page_url":"http://localhost:3000/shots/1-Et-eius-i-1","page_path":"/shots/1-Et-eius-i-1","page_search":"","page_referrer":"","is_authenticated":false,"page_controller":"screenshots/filters","page_action":"index"}}} Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36 en-US 1 0 0 0 0 0 0 0 0 1 24 1422 1304 America/Chicago 2560 1440 UTF-8 1422 4531 2021-02-26 16:42:13.502 e290f25a-b059-4944-8c92-d91e4a63074b 2021-02-26 16:42:13.626 ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0 PageViewed jsonschema 3-0-0
{"schema":"iglu:com.snowplowanalytics.snowplow.badrows/loader_parsing_error/jsonschema/2-0-0","data":{"processor":{"artifact":"snowplow-snowflake-transformer","version":"0.8.0"},"failure":{"type":"FieldNumberMismatch","fieldCount":391},"payload":"\tweb\t2021-02-26 16:41:55.911\t2021-02-26 16:41:53.180\t2021-02-26 16:41:52.787\tunstruct\tef59a327-ce50-42a7-b851-4178bccdc83b\t\titly\tjs-2.10.2\tssc-2.1.0-kinesis\tstream-enrich-1.4.2-common-1.4.2\t\t217.138.198.120\t62548142\te5f37f9c-09f4-4b21-b67a-1088a5ce6ef3\t23\t7029c6ed-6795-4091-a9f7-f1d5515f113e\t\t\t\t\t\t\t\t\t\t\t\thttp://localhost:3000/\t\t\thttp\tlocalhost\t3000\t/\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t{\"schema\":\"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0\",\"data\":{\"schema\":\"iglu:ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0/PageViewed/jsonschema/3-0-0\",\"data\":{\"page_title\":\"Dribbble - Discover the World’s Top Designers & Creative Professionals\",\"page_url\":\"http://localhost:3000/\",\"page_path\":\"/\",\"page_search\":\"\",\"page_referrer\":\"\",\"is_authenticated\":false,\"page_controller\":\"screenshots/filters\",\"page_action\":\"index\"}}}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tMozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36\t\t\t\t\t\ten-US\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t24\t1422\t1304\t\t\t\tAmerica/Chicago\t\t\t2560\t1440\tUTF-8\t1407\t3036\t\t\t\t\t\t\t\t\t\t\t\t2021-02-26 16:41:52.789\t\t\t\te290f25a-b059-4944-8c92-d91e4a63074b\t2021-02-26 16:41:53.178\tly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0\tPageViewed\tjsonschema\t3-0-0\t\t\tweb\t2021-02-26 16:41:55.929\t2021-02-26 16:41:53.231\t2021-02-26 16:41:52.899\tunstruct\t10b133ec-a517-41f0-8a6e-2ad76b46f7f0\t\titly\tjs-2.10.2\tssc-2.1.0-kinesis\tstream-enrich-1.4.2-common-1.4.2\t\t217.138.198.120\t62548142\te5f37f9c-09f4-4b21-b67a-1088a5ce6ef3\t23\t200aeb20-4892-4cfc-8cc0-cc882a29ae33\t\t\t\t\t\t\t\t\t\t\t\thttp://localhost:3000/\t\t\thttp\tlocalhost\t3000\t/\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t{\"schema\":\"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0\",\"data\":{\"schema\":\"iglu:ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0/PageViewed/jsonschema/3-0-0\",\"data\":{\"page_title\":\"Dribbble - Discover the World’s Top Designers & Creative Professionals\",\"page_url\":\"http://localhost:3000/?page=2&per_page=24&exclude_shot_ids=%2C8%2C5%2C1%2C2%2C3%2C6%2C4%2C9%2C7%2C11%2C14%2C10%2C13%2C12%2C18%2C15%2C16%2C17%2C19%2C20%2C21%2C27%2C22%2C31\",\"page_path\":\"/\",\"page_search\":\"?page=2&per_page=24&exclude_shot_ids=%2C8%2C5%2C1%2C2%2C3%2C6%2C4%2C9%2C7%2C11%2C14%2C10%2C13%2C12%2C18%2C15%2C16%2C17%2C19%2C20%2C21%2C27%2C22%2C31\",\"page_referrer\":\"\",\"is_authenticated\":false,\"page_controller\":\"screenshots/filters\",\"page_action\":\"index\"}}}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tMozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36\t\t\t\t\t\ten-US\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t24\t1422\t1304\t\t\t\tAmerica/Chicago\t\t\t2560\t1440\tUTF-8\t1407\t3058\t\t\t\t\t\t\t\t\t\t\t\t2021-02-26 16:41:53.145\t\t\t\te290f25a-b059-4944-8c92-d91e4a63074b\t2021-02-26 16:41:52.985\tly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0\tPageViewed\tjsonschema\t3-0-0\t\t\tweb\t2021-02-26 16:42:14.918\t2021-02-26 16:42:13.637\t2021-02-26 16:42:13.491\tunstruct\t340b40c7-407a-4a9d-a5b0-f1c621a1a8fb\t\titly\tjs-2.10.2\tssc-2.1.0-kinesis\tstream-enrich-1.4.2-common-1.4.2\t\t217.138.198.120\t62548142\te5f37f9c-09f4-4b21-b67a-1088a5ce6ef3\t23\td3315443-c835-496b-8887-7309bf09c3a2\t\t\t\t\t\t\t\t\t\t\t\thttp://localhost:3000/shots/1-Et-eius-i-1\t\thttp://localhost:3000/\thttp\tlocalhost\t3000\t/shots/1-Et-eius-i-1\t\t\thttp\tlocalhost\t3000\t/\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t{\"schema\":\"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0\",\"data\":{\"schema\":\"iglu:ly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0/PageViewed/jsonschema/3-0-0\",\"data\":{\"page_title\":\"\",\"page_url\":\"http://localhost:3000/shots/1-Et-eius-i-1\",\"page_path\":\"/shots/1-Et-eius-i-1\",\"page_search\":\"\",\"page_referrer\":\"\",\"is_authenticated\":false,\"page_controller\":\"screenshots/filters\",\"page_action\":\"index\"}}}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tMozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36\t\t\t\t\t\ten-US\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t24\t1422\t1304\t\t\t\tAmerica/Chicago\t\t\t2560\t1440\tUTF-8\t1422\t4531\t\t\t\t\t\t\t\t\t\t\t\t2021-02-26 16:42:13.502\t\t\t\te290f25a-b059-4944-8c92-d91e4a63074b\t2021-02-26 16:42:13.626\tly.iterative.831c4746-45a7-4353-86cf-503ec6c6b7d0\tPageViewed\tjsonschema\t3-0-0\t\t"}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment