Created
November 24, 2014 00:28
-
-
Save moomindani/995ddb38ec24533a358a to your computer and use it in GitHub Desktop.
Import CloudFront log into Redshift using Data Pipeline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"objects": [ | |
{ | |
"id": "Default", | |
"scheduleType": "TIMESERIES", | |
"failureAndRerunMode": "CASCADE", | |
"name": "Default", | |
"pipelineLogUri": "s3://path_to_log", | |
"role": "DataPipelineDefaultRole", | |
"resourceRole": "DataPipelineDefaultResourceRole" | |
}, | |
{ | |
"id": "ScheduleId1", | |
"name": "DefaultSchedule1", | |
"startAt": "FIRST_ACTIVATION_DATE_TIME", | |
"type": "Schedule", | |
"period": "15 Minutes" | |
}, | |
{ | |
"id": "S3DataNodeCloudFrontLog", | |
"name": "S3DataNodeCloudFrontLog", | |
"type": "S3DataNode", | |
"directoryPath": "s3://path_to_cloudfront_log", | |
"schedule": { | |
"ref": "ScheduleId1" | |
} | |
}, | |
{ | |
"id": "RedshiftDatabaseId1", | |
"name": "DefaultRedshiftDatabase1", | |
"type": "RedshiftDatabase", | |
"clusterId": "hoge", | |
"databaseName": "dev", | |
"username": "hogehoge", | |
"*password": "fugafuga" | |
}, | |
{ | |
"id": "RedshiftDataNodeCloudFrontLog", | |
"name": "RedshiftDataNodeCloudFrontLog", | |
"type": "RedshiftDataNode", | |
"database": { | |
"ref": "RedshiftDatabaseId1" | |
}, | |
"tableName": "cloudfront_access_logs", | |
"createTableSql": "CREATE TABLE cloudfront_access_logs ( request_date VARCHAR(MAX) SORTKEY, request_time VARCHAR(MAX) ENCODE LZO, x_edge_location VARCHAR(40) ENCODE LZO, sc_bytes INT ENCODE LZO, remote_ip VARCHAR(50) ENCODE LZO, cs_method VARCHAR(50) ENCODE LZO, cs_host VARCHAR(MAX) ENCODE LZO, cs_uri_stem VARCHAR(MAX) DISTKEY ENCODE LZO, sc_status VARCHAR(20) ENCODE LZO, cs_referrer VARCHAR(MAX) ENCODE LZO, cs_useragent VARCHAR(MAX) ENCODE LZO, cs_uri_query VARCHAR(MAX) ENCODE LZO, cs_cookie VARCHAR(MAX) ENCODE LZO, x_edge_result_type VARCHAR(MAX) ENCODE LZO, x_edge_request_id VARCHAR(MAX) ENCODE LZO, x_host_header VARCHAR(MAX) ENCODE LZO, cs_protocol VARCHAR(10) ENCODE LZO, cs_bytes INT ENCODE LZO, time_taken VARCHAR(MAX) ENCODE LZO);", | |
"schedule": { | |
"ref": "ScheduleId1" | |
} | |
}, | |
{ | |
"id": "RedshiftCopyActivityCloudFrontLog", | |
"name": "RedshiftCopyActivityCloudFrontLog", | |
"type": "RedshiftCopyActivity", | |
"input": { | |
"ref": "S3DataNodeCloudFrontLog" | |
}, | |
"commandOptions": [ | |
"DELIMITER '\t'", | |
"IGNOREHEADER 2", | |
"TRUNCATECOLUMNS", | |
"TRIMBLANKS", | |
"ACCEPTINVCHARS", | |
"MAXERROR as 100000", | |
"gzip" | |
], | |
"insertMode": "TRUNCATE", | |
"runsOn": { | |
"ref": "Ec2ResourceId1" | |
}, | |
"schedule": { | |
"ref": "ScheduleId1" | |
}, | |
"output": { | |
"ref": "RedshiftDataNodeCloudFrontLog" | |
} | |
}, | |
{ | |
"id": "Ec2ResourceId1", | |
"name": "DefaultEc2Resource1", | |
"type": "Ec2Resource", | |
"terminateAfter": "1 HOURS", | |
"schedule": { | |
"ref": "ScheduleId1" | |
}, | |
"securityGroups": "default", | |
"logUri": "s3://path_to_log", | |
"role": "DataPipelineDefaultRole", | |
"resourceRole": "DataPipelineDefaultResourceRole" | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment