Created
November 24, 2014 00:26
-
-
Save moomindani/04f0dd102f7387225747 to your computer and use it in GitHub Desktop.
Import S3 log into Redshift using Data Pipeline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"objects": [ | |
{ | |
"id": "Default", | |
"scheduleType": "TIMESERIES", | |
"failureAndRerunMode": "CASCADE", | |
"name": "Default", | |
"pipelineLogUri": "s3://path_to_log", | |
"role": "DataPipelineDefaultRole", | |
"resourceRole": "DataPipelineDefaultResourceRole" | |
}, | |
{ | |
"id": "ScheduleId1", | |
"name": "DefaultSchedule1", | |
"startAt": "FIRST_ACTIVATION_DATE_TIME", | |
"type": "Schedule", | |
"period": "15 Minutes" | |
}, | |
{ | |
"id": "S3DataNodeS3Log", | |
"name": "S3DataNodeS3Log", | |
"type": "S3DataNode", | |
"directoryPath": "s3://path_to_s3_log", | |
"schedule": { | |
"ref": "ScheduleId1" | |
} | |
}, | |
{ | |
"id": "RedshiftDatabaseId1", | |
"name": "DefaultRedshiftDatabase1", | |
"type": "RedshiftDatabase", | |
"clusterId": "hoge", | |
"databaseName": "dev", | |
"username": "hogehoge", | |
"*password": "fugafuga" | |
}, | |
{ | |
"id": "RedshiftDataNodeS3Log", | |
"name": "RedshiftDataNodeS3Log", | |
"type": "RedshiftDataNode", | |
"database": { | |
"ref": "RedshiftDatabaseId1" | |
}, | |
"tableName": "s3_access_logs", | |
"createTableSql": "CREATE TABLE s3_access_logs ( bucket_owner VARCHAR(MAX) ENCODE RUNLENGTH, bucket VARCHAR(255) ENCODE LZO, request_timestamp VARCHAR(MAX) SORTKEY ENCODE LZO, request_timestamp_delta VARCHAR(MAX) ENCODE LZO, remote_ip VARCHAR(50) ENCODE LZO, requestor VARCHAR(MAX) ENCODE LZO, request_id VARCHAR(MAX) ENCODE LZO, operation VARCHAR(MAX) ENCODE LZO, key VARCHAR(MAX) ENCODE LZO, request_uri VARCHAR(MAX) DISTKEY ENCODE LZO, http_status_code VARCHAR(MAX) ENCODE LZO, error_code VARCHAR(MAX) ENCODE LZO, sent_bytes VARCHAR(MAX) ENCODE LZO, object_size VARCHAR(MAX) ENCODE LZO, total_time VARCHAR(MAX) ENCODE LZO, turn_around_time VARCHAR(MAX) ENCODE LZO, referer VARCHAR(MAX) ENCODE LZO, user_agent VARCHAR(MAX) ENCODE LZO, version_id VARCHAR(10) ENCODE LZO);", | |
"schedule": { | |
"ref": "ScheduleId1" | |
} | |
}, | |
{ | |
"id": "RedshiftCopyActivityS3Log", | |
"name": "RedshiftCopyActivityS3Log", | |
"type": "RedshiftCopyActivity", | |
"input": { | |
"ref": "S3DataNodeS3Log" | |
}, | |
"commandOptions": [ | |
"DELIMITER ' '", | |
"TRUNCATECOLUMNS", | |
"TRIMBLANKS", | |
"REMOVEQUOTES", | |
"ACCEPTINVCHARS", | |
"MAXERROR as 100000" | |
], | |
"insertMode": "TRUNCATE", | |
"runsOn": { | |
"ref": "Ec2ResourceId1" | |
}, | |
"schedule": { | |
"ref": "ScheduleId1" | |
}, | |
"output": { | |
"ref": "RedshiftDataNodeS3Log" | |
} | |
}, | |
{ | |
"id": "Ec2ResourceId1", | |
"name": "DefaultEc2Resource1", | |
"type": "Ec2Resource", | |
"terminateAfter": "1 HOURS", | |
"schedule": { | |
"ref": "ScheduleId1" | |
}, | |
"securityGroups": "default", | |
"logUri": "s3://path_to_log", | |
"role": "DataPipelineDefaultRole", | |
"resourceRole": "DataPipelineDefaultResourceRole" | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment