Skip to content

Instantly share code, notes, and snippets.

@jster1357
jster1357 / bq_schema_extractor_to_json.py
Created March 29, 2024 13:32
Terraform requires BQ table details to be in JSON format. This code extracts the schema, partitioning, and clustering details in JSON format so it can be used in Terraform BQ object creation.
import google.auth.transport.requests
from google.oauth2 import id_token
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession
import google.auth
import requests
import json
import os
##set variables
@jster1357
jster1357 / PipelineMetadataExtractionPipeline.json
Created October 31, 2022 19:27
PipelineMetadataExtractionPipeline.json
{
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.7.1",
"scope": "SYSTEM"
},
"description": "Data Pipeline Application",
"name": "getRunIDMetrics_v4",
"config": {
"resources": {
@jster1357
jster1357 / gist:9dcbf8346d170c3423bec036555ee009
Last active November 2, 2022 15:16
getCDFMetadataDAGComposer
import datetime
import re
from airflow import models
from airflow.providers.google.cloud.operators.datafusion import CloudDataFusionStartPipelineOperator
from airflow.providers.google.cloud.sensors.datafusion import CloudDataFusionPipelineStateSensor
from airflow.providers.google.cloud.hooks.datafusion import PipelineStates
from airflow.utils.dates import days_ago
PROJECT_ID=""
REGION=""
@jster1357
jster1357 / variables.tf
Created June 2, 2022 17:59
Data Fusion Terraform - no shared vpc w/ pipeline load
variable "project_id" {
description = "The project id of the cdf deployment"
type = string
}
variable "instance_name" {
description = "The instance name."
type = string
}
@jster1357
jster1357 / resource.tf
Created June 2, 2022 17:40
Data Fusion Terraform - no shared vpc w/ pipeline load
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.22.0"
}
cdap = {
source = "GoogleCloudPlatform/cdap"
# Pin to a specific version as 0.x releases are not guaranteed to be backwards compatible.
version = "0.9.0"
@jster1357
jster1357 / main.tf
Created June 2, 2022 17:35
Data Fusion Terraform - no shared vpc w/ loaded pipelines
resource "google_data_fusion_instance" "create_instance" {
name = var.instance_name
description = var.description
region = var.region
type = var.cdf_version
enable_stackdriver_logging = true
enable_stackdriver_monitoring = true
labels = {
instance_owner = var.instance_owner
}
@jster1357
jster1357 / terraform.tvars
Created June 1, 2022 20:05
Data Fusion Terraform tvars - no shared vpc
instance_name = "my-cdf"
cdf_version = "ENTERPRISE"
cdf_release = "6.6.0"
cdf_network = "default"
cdf_ip_range = "172.28.76.0/22"
description = "Enterprise CDF deployed by Terraform"
default_service_account = "my-service-account@developer.gserviceaccount.com"
instance_owner = "Bob"
region = "us-central1"
@jster1357
jster1357 / versions.tf
Last active June 1, 2022 20:14
Data Fusion Terraform Providers - no shared vpc
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.22.0"
}
}
}
provider "google" {
@jster1357
jster1357 / variables.tf
Created June 1, 2022 19:49
Data Fusion Terraform Variables - non shared vpc
variable "instance_name" {
description = "The instance name."
type = string
}
variable "cdf_version" {
description = "The version of CDF to deploy: BASIC, DEVELOPER, ENTERPRISE"
type = string
}
@jster1357
jster1357 / main.tf
Created June 1, 2022 19:43
Data Fusion Terraform Instance Creation - no shared vpc
resource "google_data_fusion_instance" "create_instance" {
name = var.instance_name
description = var.description
region = var.region
type = var.cdf_version
enable_stackdriver_logging = true
enable_stackdriver_monitoring = true
labels = {
instance_owner = var.instance_owner
}