Skip to content

Instantly share code, notes, and snippets.

View hakanilter's full-sized avatar

Hakan İlter hakanilter

View GitHub Profile
@hakanilter
hakanilter / automatic_dynamic_masking.sql
Created November 1, 2023 10:20
DBT Databricks automatic data masking macro, must be configured as a post-hook
{% macro get_query_results_as_dict(query) %}
{{ return(adapter.dispatch('get_query_results_as_dict', 'dbt_utils')(query)) }}
{% endmacro %}
{% macro automatic_dynamic_masking() %}
{% set sensitive_columns = ['email', 'firstname', 'lastname', 'middlename', 'name', 'phone', 'telephone'] %}
{% set query %}
SELECT * FROM {{ this }} LIMIT 0
@hakanilter
hakanilter / aws_eventbridge_ecs_fargate_task_scheduler.tf
Last active March 24, 2023 14:08
AWS EventBridge ECS Fargate Task Scheduler Terraform Example
resource "aws_scheduler_schedule_group" "ecs_schedule_group" {
name = "ecs-schedule-group"
}
resource "aws_scheduler_schedule" "data_import_job_schedule" {
name = "${var.name}-data-import-job-schedule-${var.env_name}"
group_name = aws_scheduler_schedule_group.ecs_schedule_group.name
flexible_time_window {
mode = "OFF"
@hakanilter
hakanilter / execute_athena_query.py
Last active March 15, 2023 16:24
Boto3 Athena Query Example
def execute_athena_query(query, database="default", timeout=30, sleep_time=10):
context = {"Database": database}
config = {"OutputLocation": os.environ["ATHENA_BUCKET"]}
# Execute query
request = athena.start_query_execution(QueryString=query, QueryExecutionContext=context, ResultConfiguration=config)
# Wait for query result
num_tries = int(timeout / sleep_time)
status = athena.get_query_execution(QueryExecutionId=request["QueryExecutionId"])
@hakanilter
hakanilter / dynamic.tf
Created May 13, 2022 12:42
Terraform dynamic replication config for an S3 bucket
dynamic "replication_configuration" {
for_each = var.replicate_data_bucket ? [1] : []
content {
role = aws_iam_role.data_backup_replication_role.arn
rules {
id = "raw-data-replication"
prefix = "data/raw/"
status = "Enabled"
@hakanilter
hakanilter / pyspark_schema_util.py
Last active March 2, 2023 16:05
PySpark schema save/load example
import json
from pyspark.sql.types import *
def save_schema_as_json(df, schema_file):
"""
Saves dataframe schema as json
"""
schema = df.schema.json()
schema = json.dumps(json.loads(schema), indent=4)
with open(schema_file, "w") as f:
@hakanilter
hakanilter / default.conf
Created August 8, 2021 14:09
Nginx Proxy for Kibana + Basic Auth
server {
listen 80;
server_name localhost;
auth_basic "Restricted Access";
auth_basic_user_file /etc/nginx/htpasswd.users;
location / {
proxy_pass https://vpc-my-es-574vcxyz.eu-central-1.es.amazonaws.com/;
@hakanilter
hakanilter / spark_helper.py
Created September 14, 2020 07:46
Default PySpark Settings
def get_spark(app_name):
"""
Creates Spark session with default parameters
"""
spark = SparkSession.builder \
.master(os.environ.get("SPARK_MASTER", "local[*]")) \
.appName(app_name) \
.config("spark.default.parallelism", 16) \
.config("spark.sql.adaptive.enabled", True) \
.config("spark.sql.warehouse.dir", SPARK_WAREHOUSE) \
@hakanilter
hakanilter / auto-refresh.js
Created October 11, 2019 13:13
AWS UI Autorefresh JS scripts
// Cloudwatch
setInterval(function(){
document.getElementsByClassName('cwdb-log-viewer-table-infinite-loader-bottom')[0].lastElementChild.click();
document.getElementsByClassName('GIYU-ANBFDF')[0].scroll(0, document.body.scrollHeight)
}, 3000);
// EMR
setInterval(function(){
document.getElementsByClassName('GAEMCWHGM')[14].click()
}, 5000);
@hakanilter
hakanilter / s3_select.py
Created July 5, 2019 23:39
S3 Select Example
import boto3
import pandas as pd
s3 = boto3.client('s3', 'eu-west-1')
def execute_query(query):
response = s3.select_object_content(
Bucket='my-bucket',
Key='nyse/NYSE-2000-2001.tsv.gz',
ExpressionType='SQL',
@hakanilter
hakanilter / import-from-s3-to-postgres.sql
Created June 24, 2019 23:32
Import CSV from S3 to Postgres
-- https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/PostgreSQL.Procedural.Importing.html#USER_PostgreSQL.S3Import.table_import_from_s3
-- https://github.com/chimpler/postgres-aws-s3
CREATE EXTENSION aws_s3 CASCADE;
DROP TABLE nyse;
CREATE TABLE nyse (
exchange VARCHAR(50),
stock_symbol VARCHAR(50),
stock_date DATE,