Created
October 20, 2015 17:53
-
-
Save githoov/8f00be98b0b29c46257a to your computer and use it in GitHub Desktop.
Snowplow Config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aws: | |
access_key_id: my_id | |
secret_access_key: my_key | |
s3: | |
region: us-east-1 | |
buckets: | |
assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket | |
jsonpath_assets: s3://snowplow-looker/jsonpaths | |
log: s3n://snowplow-looker-emr-log/ | |
raw: | |
in: | |
- s3://elasticbeanstalk-us-east-1-734261250617/resources/environments/logs/publish/e-uftusxb7dt/ | |
processing: s3://snowplow-looker-emr-processing/events1/ | |
archive: s3://snowplow-looker-emr-archive/raw1/ | |
enriched: | |
good: s3://snowplow-looker-emr-out/enriched1/good/ | |
bad: s3://snowplow-looker-emr-out/enriched1/bad/ | |
errors: s3://snowplow-looker-emr-out/enriched1/error/ | |
archive: s3://snowplow-looker-emr-out/enriched1/error/archive/ | |
shredded: | |
good: s3://snowplow-looker-emr-out/shredded1/good/ | |
bad: s3://snowplow-looker-emr-out/shredded1/bad/ | |
errors: # Leave blank unless :continue_on_unexpected_error: set to true below | |
archive: s3://snowplow-looker-emr-out/shredded1/archive/ | |
emr: | |
ami_version: 3.7.0 # Don't change this | |
region: us-east-1 | |
jobflow_role: EMR_EC2_DefaultRole # Created using $ aws emr create-default-roles | |
service_role: EMR_DefaultRole # Created using $ aws emr create-default-roles | |
placement: # Set this if not running in VPC. Leave blank otherwise | |
ec2_subnet_id: subnet-23ff3a53 # Set this if running in VPC. Leave blank otherwise | |
ec2_key_name: snowplow-etl | |
bootstrap: [] # Set this to specify custom boostrap actions. Leave empty otherwise | |
software: | |
hbase: # To launch on cluster, provide version, "0.92.0", keep quotes | |
lingual: # To launch on cluster, provide version, "1.1", keep quotes | |
# Adjust your Hadoop cluster below | |
jobflow: | |
master_instance_type: m1.large | |
core_instance_count: 2 | |
core_instance_type: m1.medium | |
task_instance_count: 0 # Increase to use spot instances | |
task_instance_type: m1.small | |
task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances | |
bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures | |
collectors: | |
format: clj-tomcat # Or 'clj-tomcat' for the Clojure Collector, or 'thrift' for Thrift records, or 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs | |
enrich: | |
job_name: New Snowplow ETL # Give your job a name | |
versions: | |
hadoop_enrich: 1.2.0 # Version of the Hadoop Enrichment process | |
hadoop_shred: 0.5.0 # Version of the Hadoop Shredding process | |
continue_on_unexpected_error: false # Set to 'true' (and set :out_errors: above) if you don't want any exceptions thrown from ETL | |
output_compression: GZIP # Compression only supported with Redshift, set to NONE if you have Postgres targets. Allowed formats: NONE, GZIP | |
storage: | |
download: | |
folder: # Postgres-only config option. Where to store the downloaded files. Leave blank for Redshift | |
targets: | |
- name: "Meta Redshift Cluster" | |
type: redshift | |
host: my_redshift_host | |
database: meta | |
port: 5439 | |
ssl_mode: disable # One of disable (default), require, verify-ca or verify-full | |
table: snowplow.events #using a custom schema here | |
username: my_user | |
password: my_password | |
maxerror: 1 # Stop loading on first error, or increase to permit more load errors | |
comprows: 200000 # Default for a 1 XL node cluster. Not used unless --include compupdate specified | |
monitoring: | |
tags: {} # Name-value pairs describing this job | |
logging: | |
level: DEBUG # You can optionally switch to INFO for production | |
snowplow: | |
method: post | |
app_id: snowping # e.g. snowplow | |
collector: snowping.looker.com # e.g. d3rkrsqld9gmqf.cloudfront.net | |
iglu: | |
schema: iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-0 | |
data: | |
cache_size: 500 | |
repositories: | |
- name: "Iglu Central" | |
priority: 0 | |
vendor_prefixes: | |
- com.snowplowanalytics | |
connection: | |
http: | |
uri: http://iglucentral.com | |
- name: "Looker Repository" | |
priority: 5 | |
vendor_prefixes: | |
- com.looker | |
connection: | |
http: | |
uri: https://s3.amazonaws.com/snowplow-looker |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment