Skip to content

Instantly share code, notes, and snippets.

@jster1357
jster1357 / gcs_cdf_trigger_function.py
Last active July 13, 2021 20:22
Google GCS cloud function trigger for data fusion pipeline
from google.cloud import pubsub_v1
import subprocess
import requests
import json
import os
import time
# get the access token for the API call
def get_access_token():
@jster1357
jster1357 / main.tf
Created June 1, 2022 19:43
Data Fusion Terraform Instance Creation - no shared vpc
resource "google_data_fusion_instance" "create_instance" {
name = var.instance_name
description = var.description
region = var.region
type = var.cdf_version
enable_stackdriver_logging = true
enable_stackdriver_monitoring = true
labels = {
instance_owner = var.instance_owner
}
@jster1357
jster1357 / variables.tf
Created June 1, 2022 19:49
Data Fusion Terraform Variables - non shared vpc
variable "instance_name" {
description = "The instance name."
type = string
}
variable "cdf_version" {
description = "The version of CDF to deploy: BASIC, DEVELOPER, ENTERPRISE"
type = string
}
@jster1357
jster1357 / versions.tf
Last active June 1, 2022 20:14
Data Fusion Terraform Providers - no shared vpc
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.22.0"
}
}
}
provider "google" {
@jster1357
jster1357 / terraform.tvars
Created June 1, 2022 20:05
Data Fusion Terraform tvars - no shared vpc
instance_name = "my-cdf"
cdf_version = "ENTERPRISE"
cdf_release = "6.6.0"
cdf_network = "default"
cdf_ip_range = "172.28.76.0/22"
description = "Enterprise CDF deployed by Terraform"
default_service_account = "my-service-account@developer.gserviceaccount.com"
instance_owner = "Bob"
region = "us-central1"
@jster1357
jster1357 / main.tf
Created June 2, 2022 17:35
Data Fusion Terraform - no shared vpc w/ loaded pipelines
resource "google_data_fusion_instance" "create_instance" {
name = var.instance_name
description = var.description
region = var.region
type = var.cdf_version
enable_stackdriver_logging = true
enable_stackdriver_monitoring = true
labels = {
instance_owner = var.instance_owner
}
@jster1357
jster1357 / resource.tf
Created June 2, 2022 17:40
Data Fusion Terraform - no shared vpc w/ pipeline load
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.22.0"
}
cdap = {
source = "GoogleCloudPlatform/cdap"
# Pin to a specific version as 0.x releases are not guaranteed to be backwards compatible.
version = "0.9.0"
@jster1357
jster1357 / variables.tf
Created June 2, 2022 17:59
Data Fusion Terraform - no shared vpc w/ pipeline load
variable "project_id" {
description = "The project id of the cdf deployment"
type = string
}
variable "instance_name" {
description = "The instance name."
type = string
}
@jster1357
jster1357 / gist:9dcbf8346d170c3423bec036555ee009
Last active November 2, 2022 15:16
getCDFMetadataDAGComposer
import datetime
import re
from airflow import models
from airflow.providers.google.cloud.operators.datafusion import CloudDataFusionStartPipelineOperator
from airflow.providers.google.cloud.sensors.datafusion import CloudDataFusionPipelineStateSensor
from airflow.providers.google.cloud.hooks.datafusion import PipelineStates
from airflow.utils.dates import days_ago
PROJECT_ID=""
REGION=""
@jster1357
jster1357 / PipelineMetadataExtractionPipeline.json
Created October 31, 2022 19:27
PipelineMetadataExtractionPipeline.json
{
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.7.1",
"scope": "SYSTEM"
},
"description": "Data Pipeline Application",
"name": "getRunIDMetrics_v4",
"config": {
"resources": {