pulumi databricks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""An AWS Python Pulumi program to set up all the infrastructure which is required | |
- Databricks | |
- S3 buckets | |
We use the default Databricks managed VPC | |
""" | |
import json | |
from pathlib import Path | |
import pulumi | |
from jinja2 import Environment, FileSystemLoader | |
from pulumi_aws_native import iam, s3 | |
from pulumi_databricks import MwsCredentials, MwsStorageConfigurations, MwsWorkspaces | |
aws_config = pulumi.Config("aws-native") | |
config = pulumi.Config() | |
prefix = config.require("prefix") | |
our_env = config.require("environment") | |
bucket_results = s3.Bucket(f"{prefix}-results") | |
bucket_databricks_root = s3.Bucket(f"{prefix}-root") | |
db_account_id = config.require("db_account_id") | |
databricks_aws_account_id = config.require("databricks_aws_account_id") | |
region = aws_config.require("region") | |
tpl_path = Path("files") | |
env = Environment(loader=FileSystemLoader(tpl_path)) | |
cross_account_role_tpl = env.get_template("iam-db-cross-account-role.json.tpl") | |
variables = { | |
"db_official_databricks_aws_account": databricks_aws_account_id, | |
"db_account_id": db_account_id, | |
} | |
cross_account_role_tpl_r = cross_account_role_tpl.render(**variables) | |
access_policy = json.loads( | |
env.get_template("databricks_deployment_policy.json").render() | |
) | |
cross_account_role = iam.Role( | |
f"{prefix}-cross-account-role", | |
assume_role_policy_document=cross_account_role_tpl_r, | |
) | |
databricks_default_policy = iam.RolePolicy( | |
'databricks-deployment-p', | |
role_name=cross_account_role.role_name, | |
policy_document=access_policy, | |
policy_name="databricks-deployment-p" | |
) | |
def add_s3_policy(role_arn, bucket_name): | |
print(role_arn, bucket_name) | |
vars = { | |
"role_arn": role_arn, | |
"bucket_name": bucket_name, | |
} | |
s3_tpl = env.get_template("databricks_s3_permissions.json.tpl").render(**vars) | |
s3_tpl = json.loads(s3_tpl) | |
return iam.RolePolicy( | |
'databricks-s3-access', | |
role_name=cross_account_role.role_name, | |
policy_document=s3_tpl, | |
policy_name="databricks-s3-access" | |
) | |
wrapped_outputs = pulumi.Output.all(cross_account_role.role_name, bucket_databricks_root.bucket_name) | |
s3_applied_policy = wrapped_outputs.apply(lambda x: add_s3_policy(x[0], x[1])) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"Version": "2012-10-17", | |
"Statement": [ | |
{ | |
"Effect": "Allow", | |
"Principal": { | |
"AWS": "{{ role_arn }}" | |
}, | |
"Action": [ | |
"s3:ListBucket", | |
"s3:GetObject", | |
"s3:PutObject", | |
"s3:DeleteObject", | |
"s3:PutBucketOwnerControl" | |
], | |
"Resource": [ | |
"arn:aws:s3:::{{ bucket_name }}", | |
"arn:aws:s3:::{{ bucket_name }}/*" | |
] | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"Version": "2012-10-17", | |
"Statement": [ | |
{ | |
"Sid": "Stmt1403287045000", | |
"Effect": "Allow", | |
"Action": [ | |
"ec2:AllocateAddress", | |
"ec2:AssignPrivateIpAddresses", | |
"ec2:AssociateDhcpOptions", | |
"ec2:AssociateIamInstanceProfile", | |
"ec2:AssociateRouteTable", | |
"ec2:AttachInternetGateway", | |
"ec2:AttachVolume", | |
"ec2:AuthorizeSecurityGroupEgress", | |
"ec2:AuthorizeSecurityGroupIngress", | |
"ec2:CancelSpotInstanceRequests", | |
"ec2:CreateDhcpOptions", | |
"ec2:CreateFleet", | |
"ec2:CreateInternetGateway", | |
"ec2:CreateLaunchTemplate", | |
"ec2:CreateLaunchTemplateVersion", | |
"ec2:CreateNatGateway", | |
"ec2:CreateRoute", | |
"ec2:CreateRouteTable", | |
"ec2:CreateSecurityGroup", | |
"ec2:CreateSubnet", | |
"ec2:CreateTags", | |
"ec2:CreateVolume", | |
"ec2:CreateVpc", | |
"ec2:CreateVpcEndpoint", | |
"ec2:DeleteDhcpOptions", | |
"ec2:DeleteFleets", | |
"ec2:DeleteInternetGateway", | |
"ec2:DeleteLaunchTemplate", | |
"ec2:DeleteLaunchTemplateVersions", | |
"ec2:DeleteNatGateway", | |
"ec2:DeleteRoute", | |
"ec2:DeleteRouteTable", | |
"ec2:DeleteSecurityGroup", | |
"ec2:DeleteSubnet", | |
"ec2:DeleteTags", | |
"ec2:DeleteVolume", | |
"ec2:DeleteVpc", | |
"ec2:DeleteVpcEndpoints", | |
"ec2:DescribeAvailabilityZones", | |
"ec2:DescribeFleetHistory", | |
"ec2:DescribeFleetInstances", | |
"ec2:DescribeFleets", | |
"ec2:DescribeIamInstanceProfileAssociations", | |
"ec2:DescribeInstanceStatus", | |
"ec2:DescribeInstances", | |
"ec2:DescribeInternetGateways", | |
"ec2:DescribeLaunchTemplates", | |
"ec2:DescribeLaunchTemplateVersions", | |
"ec2:DescribeNatGateways", | |
"ec2:DescribePrefixLists", | |
"ec2:DescribeReservedInstancesOfferings", | |
"ec2:DescribeRouteTables", | |
"ec2:DescribeSecurityGroups", | |
"ec2:DescribeSpotInstanceRequests", | |
"ec2:DescribeSpotPriceHistory", | |
"ec2:DescribeSubnets", | |
"ec2:DescribeVolumes", | |
"ec2:DescribeVpcs", | |
"ec2:DetachInternetGateway", | |
"ec2:DisassociateIamInstanceProfile", | |
"ec2:DisassociateRouteTable", | |
"ec2:GetLaunchTemplateData", | |
"ec2:GetSpotPlacementScores", | |
"ec2:ModifyFleet", | |
"ec2:ModifyLaunchTemplate", | |
"ec2:ModifyVpcAttribute", | |
"ec2:ReleaseAddress", | |
"ec2:ReplaceIamInstanceProfileAssociation", | |
"ec2:RequestSpotInstances", | |
"ec2:RevokeSecurityGroupEgress", | |
"ec2:RevokeSecurityGroupIngress", | |
"ec2:RunInstances", | |
"ec2:TerminateInstances" | |
], | |
"Resource": [ | |
"*" | |
] | |
}, | |
{ | |
"Effect": "Allow", | |
"Action": [ | |
"iam:CreateServiceLinkedRole", | |
"iam:PutRolePolicy" | |
], | |
"Resource": "arn:aws:iam::*:role/aws-service-role/spot.amazonaws.com/AWSServiceRoleForEC2Spot", | |
"Condition": { | |
"StringLike": { | |
"iam:AWSServiceName": "spot.amazonaws.com" | |
} | |
} | |
} | |
] | |
} | |
I was able to feed:
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:ListBucket",
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:PutBucketOwnerControl"
],
"Resource": [
"arn:aws:s3:::{{ bucket_name }}",
"arn:aws:s3:::{{ bucket_name }}/*"
]
}
]
}
I still am very curious to learn how it works in a better way.
However, ideally, I can also figure out how:
cannot create mws workspaces: MALFORMED_REQUEST: Failed storage configuration validation checks: List,Put,PutWithBucketOwnerFullControl,Delete
is fixed. I had hoped that feeding the policy would solve this as well.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The
is directly sourced from the databricks documentation https://docs.databricks.com/en/administration-guide/account-settings-e2/credentials.html#option-1-default-deployment-policy - apparently, their default SaaS Databricks managed VPC.
It does not contain any S3 References.
The file
contains the reference to a specific bucket (which is created by Pulumi) but I do not know how to feed the existing bucket via apply over properly. Currently, The script is failing with:
as I am somehow mis-attaching the policies during the apply hell.