Skip to content

Instantly share code, notes, and snippets.

@moomindani
Created November 24, 2014 00:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moomindani/04f0dd102f7387225747 to your computer and use it in GitHub Desktop.
Save moomindani/04f0dd102f7387225747 to your computer and use it in GitHub Desktop.
Import S3 log into Redshift using Data Pipeline
{
"objects": [
{
"id": "Default",
"scheduleType": "TIMESERIES",
"failureAndRerunMode": "CASCADE",
"name": "Default",
"pipelineLogUri": "s3://path_to_log",
"role": "DataPipelineDefaultRole",
"resourceRole": "DataPipelineDefaultResourceRole"
},
{
"id": "ScheduleId1",
"name": "DefaultSchedule1",
"startAt": "FIRST_ACTIVATION_DATE_TIME",
"type": "Schedule",
"period": "15 Minutes"
},
{
"id": "S3DataNodeS3Log",
"name": "S3DataNodeS3Log",
"type": "S3DataNode",
"directoryPath": "s3://path_to_s3_log",
"schedule": {
"ref": "ScheduleId1"
}
},
{
"id": "RedshiftDatabaseId1",
"name": "DefaultRedshiftDatabase1",
"type": "RedshiftDatabase",
"clusterId": "hoge",
"databaseName": "dev",
"username": "hogehoge",
"*password": "fugafuga"
},
{
"id": "RedshiftDataNodeS3Log",
"name": "RedshiftDataNodeS3Log",
"type": "RedshiftDataNode",
"database": {
"ref": "RedshiftDatabaseId1"
},
"tableName": "s3_access_logs",
"createTableSql": "CREATE TABLE s3_access_logs ( bucket_owner VARCHAR(MAX) ENCODE RUNLENGTH, bucket VARCHAR(255) ENCODE LZO, request_timestamp VARCHAR(MAX) SORTKEY ENCODE LZO, request_timestamp_delta VARCHAR(MAX) ENCODE LZO, remote_ip VARCHAR(50) ENCODE LZO, requestor VARCHAR(MAX) ENCODE LZO, request_id VARCHAR(MAX) ENCODE LZO, operation VARCHAR(MAX) ENCODE LZO, key VARCHAR(MAX) ENCODE LZO, request_uri VARCHAR(MAX) DISTKEY ENCODE LZO, http_status_code VARCHAR(MAX) ENCODE LZO, error_code VARCHAR(MAX) ENCODE LZO, sent_bytes VARCHAR(MAX) ENCODE LZO, object_size VARCHAR(MAX) ENCODE LZO, total_time VARCHAR(MAX) ENCODE LZO, turn_around_time VARCHAR(MAX) ENCODE LZO, referer VARCHAR(MAX) ENCODE LZO, user_agent VARCHAR(MAX) ENCODE LZO, version_id VARCHAR(10) ENCODE LZO);",
"schedule": {
"ref": "ScheduleId1"
}
},
{
"id": "RedshiftCopyActivityS3Log",
"name": "RedshiftCopyActivityS3Log",
"type": "RedshiftCopyActivity",
"input": {
"ref": "S3DataNodeS3Log"
},
"commandOptions": [
"DELIMITER ' '",
"TRUNCATECOLUMNS",
"TRIMBLANKS",
"REMOVEQUOTES",
"ACCEPTINVCHARS",
"MAXERROR as 100000"
],
"insertMode": "TRUNCATE",
"runsOn": {
"ref": "Ec2ResourceId1"
},
"schedule": {
"ref": "ScheduleId1"
},
"output": {
"ref": "RedshiftDataNodeS3Log"
}
},
{
"id": "Ec2ResourceId1",
"name": "DefaultEc2Resource1",
"type": "Ec2Resource",
"terminateAfter": "1 HOURS",
"schedule": {
"ref": "ScheduleId1"
},
"securityGroups": "default",
"logUri": "s3://path_to_log",
"role": "DataPipelineDefaultRole",
"resourceRole": "DataPipelineDefaultResourceRole"
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment