Skip to content

Instantly share code, notes, and snippets.

@okelet
Last active February 21, 2022 21:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save okelet/ba9a5b98233362a96f22c33c465289dd to your computer and use it in GitHub Desktop.
Save okelet/ba9a5b98233362a96f22c33c465289dd to your computer and use it in GitHub Desktop.

Testing Athena partition projection

Moved to the blog.

terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
}
}
provider "aws" {
default_tags {
tags = {
Terraform = "true"
Environment = terraform.workspace
Owner = "Ops"
}
}
}
data "aws_caller_identity" "current" {}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/elb_service_account
data "aws_elb_service_account" "main" {}
data "aws_region" "current" {}
data "aws_availability_zones" "available" {
state = "available"
}
variable "key_pair" {
type = string
default = null
}
variable "prefix" {
type = string
default = "lb_logs_athena_auto_part"
}
variable "athena_projection_enabled" {
type = bool
default = true
}
locals {
ecs_service_name = "echo"
vpc_cidr = "192.168.22.0/24"
}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
name = "${var.prefix}-vpc"
cidr = local.vpc_cidr
azs = slice(data.aws_availability_zones.available.names, 0, 2)
private_subnets = [cidrsubnet(local.vpc_cidr, 4, 0), cidrsubnet(local.vpc_cidr, 4, 1)]
public_subnets = [cidrsubnet(local.vpc_cidr, 4, 2), cidrsubnet(local.vpc_cidr, 4, 3)]
enable_nat_gateway = true
single_nat_gateway = true
enable_dns_hostnames = true
enable_dns_support = true
}
data "aws_ami" "amazon_linux_2" {
most_recent = true
owners = ["amazon"]
filter {
name = "name"
values = ["amzn2-ami-hvm-*-x86_64-ebs"]
}
}
resource "aws_security_group" "allow_ssh" {
name = "allow_ssh"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 22
to_port = 22
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "ALL"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_security_group" "allow_http_80" {
name = "${var.prefix}_allow_http_80"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "ALL"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_security_group" "allow_http_8080" {
name = "${var.prefix}_allow_http_8080"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "ALL"
cidr_blocks = ["0.0.0.0/0"]
}
}
resource "aws_instance" "bastion" {
ami = data.aws_ami.amazon_linux_2.id
instance_type = "t3.micro"
key_name = var.key_pair
user_data = <<-EOF
#!/bin/bash
amazon-linux-extras install nginx1 -y
systemctl enable nginx
cp /usr/share/nginx/html/index.html{,.old}
echo "Hello from $(hostname)" > /usr/share/nginx/html/index.html
systemctl start nginx
EOF
associate_public_ip_address = true
subnet_id = module.vpc.public_subnets[0]
vpc_security_group_ids = [aws_security_group.allow_ssh.id, aws_security_group.allow_http_80.id]
tags = {
Name = "bastion"
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket
resource "aws_s3_bucket" "athena_results" {
bucket = replace("${data.aws_caller_identity.current.account_id}_${data.aws_region.current.name}_${var.prefix}_athena_results", "/[^a-zA-Z0-9-]+/", "-")
acl = "private"
force_destroy = true
server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket
resource "aws_s3_bucket" "lb_logs" {
bucket = replace("${data.aws_caller_identity.current.account_id}_${data.aws_region.current.name}_${var.prefix}_alb_logs", "/[^a-zA-Z0-9-]+/", "-")
acl = "private"
force_destroy = true
server_side_encryption_configuration {
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy
resource "aws_s3_bucket_policy" "b" {
bucket = aws_s3_bucket.lb_logs.bucket
policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
{
"Effect" : "Allow",
"Principal" : {
"AWS" : data.aws_elb_service_account.main.arn
},
"Action" : "s3:PutObject",
"Resource" : "arn:aws:s3:::${aws_s3_bucket.lb_logs.bucket}/AWSLogs/${data.aws_caller_identity.current.account_id}/*"
},
{
"Effect" : "Allow",
"Principal" : {
"Service" : "delivery.logs.amazonaws.com"
},
"Action" : "s3:PutObject",
"Resource" : "arn:aws:s3:::${aws_s3_bucket.lb_logs.bucket}/AWSLogs/${data.aws_caller_identity.current.account_id}/*",
"Condition" : {
"StringEquals" : {
"s3:x-amz-acl" : "bucket-owner-full-control"
}
}
},
{
"Effect" : "Allow",
"Principal" : {
"Service" : "delivery.logs.amazonaws.com"
},
"Action" : "s3:GetBucketAcl",
"Resource" : "arn:aws:s3:::${aws_s3_bucket.lb_logs.bucket}"
}
]
})
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lb
resource "aws_lb" "alb" {
name = replace(replace("${var.prefix}_alb", "_", "-"), "/[^a-zA-Z0-9-]/", "")
load_balancer_type = "application"
security_groups = [aws_security_group.allow_http_80.id]
subnets = module.vpc.public_subnets
access_logs {
bucket = aws_s3_bucket.lb_logs.bucket
enabled = true
}
}
resource "aws_lb_listener" "http" {
load_balancer_arn = aws_lb.alb.arn
port = "80"
protocol = "HTTP"
default_action {
type = "fixed-response"
fixed_response {
content_type = "text/html"
message_body = "<html><body><p>Nothing yet here; try <a href=\"/echo/\">here</a>.</p></body></html>\n"
status_code = "404"
}
}
}
resource "aws_lb_target_group" "tg" {
name = replace(replace("${var.prefix}_${local.ecs_service_name}", "_", "-"), "/[^a-zA-Z0-9-]/", "")
port = 80
protocol = "HTTP"
target_type = "ip"
vpc_id = module.vpc.vpc_id
deregistration_delay = 5
}
resource "aws_lb_listener_rule" "static" {
listener_arn = aws_lb_listener.http.arn
priority = 100
action {
type = "forward"
target_group_arn = aws_lb_target_group.tg.arn
}
condition {
path_pattern {
values = ["/${local.ecs_service_name}/*"]
}
}
}
resource "aws_ecs_cluster" "cluster" {
name = "${var.prefix}_ecs_cluster"
capacity_providers = ["FARGATE", "FARGATE_SPOT"]
default_capacity_provider_strategy {
capacity_provider = "FARGATE_SPOT"
}
setting {
name = "containerInsights"
value = "enabled"
}
}
data "aws_iam_policy_document" "task_role_assume_role_policy" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}
resource "aws_iam_role" "task_role" {
name = "${var.prefix}_task_role"
assume_role_policy = data.aws_iam_policy_document.task_role_assume_role_policy.json
}
resource "aws_iam_role_policy" "task_policy" {
name = "${var.prefix}_task_policy"
role = aws_iam_role.task_role.id
policy = jsonencode({
"Version" = "2012-10-17"
"Statement" = [
{
"Effect" = "Allow"
"Action" = [
"ssmmessages:CreateControlChannel",
"ssmmessages:CreateDataChannel",
"ssmmessages:OpenControlChannel",
"ssmmessages:OpenDataChannel"
]
"Resource" = "*"
}
]
})
}
resource "aws_iam_role" "task_execution_role" {
name = "${var.prefix}_task_execution_role"
assume_role_policy = jsonencode({
"Version" = "2012-10-17"
"Statement" = [
{
"Action" = "sts:AssumeRole"
"Principal" = {
"Service" = "ecs-tasks.amazonaws.com"
}
"Effect" = "Allow"
}
]
})
}
resource "aws_iam_role_policy_attachment" "task_execution_role" {
role = aws_iam_role.task_execution_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role_policy_attachment" "task_execution_role_attachment_CloudWatchFullAccess" {
role = aws_iam_role.task_execution_role.name
policy_arn = "arn:aws:iam::aws:policy/CloudWatchFullAccess"
}
resource "aws_cloudwatch_log_group" "log_group" {
name = "/ecs/clusters/${aws_ecs_cluster.cluster.name}/services/${local.ecs_service_name}"
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_task_definition
resource "aws_ecs_task_definition" "task_def" {
family = "${var.prefix}_${local.ecs_service_name}"
network_mode = "awsvpc"
requires_compatibilities = ["EC2", "FARGATE"]
cpu = 256
memory = 512
execution_role_arn = aws_iam_role.task_execution_role.arn
task_role_arn = aws_iam_role.task_role.arn
container_definitions = jsonencode([
{
name = "echo"
image = "mendhak/http-https-echo:19"
essential = true
portMappings = [
{
containerPort = 8080
hostPort = 8080
}
],
"linuxParameters" : {
"initProcessEnabled" : true
},
"logConfiguration" = {
"logDriver" = "awslogs"
"options" = {
"awslogs-group" = aws_cloudwatch_log_group.log_group.name
"awslogs-region" = data.aws_region.current.name
"awslogs-stream-prefix" = "ecs"
}
}
}
])
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/ecs_service
resource "aws_ecs_service" "service" {
name = "${var.prefix}_${local.ecs_service_name}"
cluster = aws_ecs_cluster.cluster.id
task_definition = aws_ecs_task_definition.task_def.arn
desired_count = 2
enable_execute_command = true
network_configuration {
subnets = module.vpc.private_subnets
security_groups = [aws_security_group.allow_http_8080.id]
}
load_balancer {
target_group_arn = aws_lb_target_group.tg.arn
container_name = "echo"
container_port = 8080
}
lifecycle {
ignore_changes = [desired_count]
}
capacity_provider_strategy {
capacity_provider = "FARGATE_SPOT"
weight = 1
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_catalog_database
resource "aws_glue_catalog_database" "athena_db" {
name = "${var.prefix}_alb_logs"
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/glue_catalog_table
# https://docs.aws.amazon.com/athena/latest/ug/application-load-balancer-logs.html
resource "aws_glue_catalog_table" "aws_glue_catalog_table" {
name = "alb_logs"
database_name = aws_glue_catalog_database.athena_db.name
table_type = "EXTERNAL_TABLE"
partition_keys {
name = "year"
type = "int"
}
partition_keys {
name = "month"
type = "int"
}
partition_keys {
name = "day"
type = "int"
}
parameters = {
"has_encrypted_data" = "false"
"projection.enabled" = var.athena_projection_enabled ? "true" : "false"
"projection.day.digits" = "2"
"projection.day.range" = "01,31"
"projection.day.type" = "integer"
"projection.month.digits" = "2"
"projection.month.range" = "01,12"
"projection.month.type" = "integer"
"projection.year.digits" = "4"
"projection.year.range" = "2020,2099"
"projection.year.type" = "integer"
"storage.location.template" = "s3://${aws_s3_bucket.lb_logs.bucket}/AWSLogs/${data.aws_caller_identity.current.account_id}/elasticloadbalancing/${data.aws_region.current.name}/$${year}/$${month}/$${day}"
}
storage_descriptor {
location = "s3://${aws_s3_bucket.lb_logs.bucket}/AWSLogs/${data.aws_caller_identity.current.account_id}/elasticloadbalancing/${data.aws_region.current.name}/"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
ser_de_info {
serialization_library = "org.apache.hadoop.hive.serde2.RegexSerDe"
parameters = {
"serialization.format" = 1
"input.regex" = "([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\\s]+?)\" \"([^\\s]+)\" \"([^ ]*)\" \"([^ ]*)\""
}
}
columns {
name = "type"
type = "string"
}
columns {
name = "time"
type = "string"
}
columns {
name = "elb"
type = "string"
}
columns {
name = "client_ip"
type = "string"
}
columns {
name = "client_port"
type = "string"
}
columns {
name = "target_ip"
type = "string"
}
columns {
name = "target_port"
type = "int"
}
columns {
name = "request_processing_time"
type = "double"
}
columns {
name = "target_processing_time"
type = "double"
}
columns {
name = "response_processing_time"
type = "double"
}
columns {
name = "elb_status_code"
type = "string"
}
columns {
name = "target_status_code"
type = "string"
}
columns {
name = "received_bytes"
type = "bigint"
}
columns {
name = "sent_bytes"
type = "bigint"
}
columns {
name = "request_verb"
type = "string"
}
columns {
name = "request_url"
type = "string"
}
columns {
name = "request_proto"
type = "string"
}
columns {
name = "user_agent"
type = "string"
}
columns {
name = "ssl_cipher"
type = "string"
}
columns {
name = "ssl_protocol"
type = "string"
}
columns {
name = "target_group_arn"
type = "string"
}
columns {
name = "trace_id"
type = "string"
}
columns {
name = "domain_name"
type = "string"
}
columns {
name = "chosen_cert_arn"
type = "string"
}
columns {
name = "matched_rule_priority"
type = "string"
}
columns {
name = "request_creation_time"
type = "string"
}
columns {
name = "actions_executed"
type = "string"
}
columns {
name = "redirect_url"
type = "string"
}
columns {
name = "lambda_error_reason"
type = "string"
}
columns {
name = "target_port_list"
type = "string"
}
columns {
name = "target_status_code_list"
type = "string"
}
columns {
name = "classification"
type = "string"
}
columns {
name = "classification_reason"
type = "string"
}
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/athena_workgroup
resource "aws_athena_workgroup" "primary" {
name = "${var.prefix}_workgroup"
force_destroy = true
configuration {
result_configuration {
output_location = "s3://${aws_s3_bucket.athena_results.bucket}/"
}
}
}
output "lb_addr" {
value = aws_lb.alb.dns_name
}
output "alb_s3_logs_path" {
value = "s3://${aws_s3_bucket.lb_logs.bucket}/AWSLogs/${data.aws_caller_identity.current.account_id}/elasticloadbalancing/${data.aws_region.current.name}"
}
output "athena_workgroup_name" {
value = aws_athena_workgroup.primary.name
}
output "athena_workgroup_bucket" {
value = aws_s3_bucket.athena_results.bucket
}
output "athena_db_name" {
value = aws_glue_catalog_database.athena_db.name
}
output "athena_table_name" {
value = aws_glue_catalog_table.aws_glue_catalog_table.name
}
@Kikimora
Copy link

I've tried to follow a few blog posts that describe how to setup ECS with ALB using terraform with no success. They were so fragmented and long and detailed, I quickly lost in the weeds. This piece shows how to setup ECS properly and to me this is more valuable than Athena setup itself :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment