Skip to content

Instantly share code, notes, and snippets.

View tonythor's full-sized avatar

Tony Fraser tonythor

View GitHub Profile
@tonythor
tonythor / lambda_handler.py
Created February 22, 2022 21:31
Lambda Send Slack Message
import urllib3
import json
test_hook="https://hooks.slack.com/services/TEF2GKSV7/XXXXXXHPH/XXXXXXXXXXXqaFQy7O3"
payload = {'text':'lambda just sent a slack message to this channel'}
http = urllib3.PoolManager()
def lambda_handler(event, context):
r=http.request("POST", test_hook,
@tonythor
tonythor / ddb_image_text_model.json
Created February 16, 2022 05:49
Use lambda/recognition to extract text from images on s3 upload, then store text in flat DynamoDB table
{
"ModelName": "Image_Text",
"ModelMetadata": {
"Author": "",
"DateCreated": "Feb 15, 2022, 09:52 PM",
"DateLastModified": "Feb 15, 2022, 10:38 PM",
"Description": "",
"AWSService": "Amazon DynamoDB",
"Version": "3.0"
},
@tonythor
tonythor / appconfig_with_nested_secrets.py
Created January 31, 2022 21:48
a function that hits aws appconfig, extracts nested aws secrets locations from config, then goes and looks those up as well.
# An example of how to use boto3 to hit AWS, get an appconfig
# configuration, then extract locations of secrets keys, and then
# look them up in AWS Secrets Manager.
# Should work fine in lambda if moved into a handler function.
import boto3
import json
from botocore.exceptions import ClientError
class SecretsWrapper:
@tonythor
tonythor / simple_airflow_xcom_tutorial_dag.py
Created November 10, 2021 14:59
A simple 1.10 dag that takes in an input json object, stores it, and then uses it later.
from airflow.utils.dates import days_ago
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
# use this json when you trigger.
# it's here as a variable only for cut-and-paste formatting and to keep the demo portable
cutAndPasteJson = """
{
"note": "For the passing variables dummy dag",
@tonythor
tonythor / Dockerfile
Created September 29, 2021 19:34
docker-compose and dockerfile for version 1.10 of airflow - to be used for local development
FROM python:3.8
ARG AIRFLOW_VERSION=1.10.12
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}
COPY ./requirements.txt /requirements.txt
@tonythor
tonythor / recursivefunctionexec.scala
Last active January 12, 2021 19:48
recursively execute a function in scala until function is true, or done
import scala.annotation.tailrec
import scala.concurrent.duration.Duration
import scala.util.Random
// the function we'll run until true
def myFunction(): Boolean = {
val rand = Random.nextInt()
if (rand % 10 == 0) {
print(s"${rand} is divisible by 10\n")
true
@tonythor
tonythor / zeppelin_test.sh
Created December 18, 2020 06:33
use curl to trigger the zeppelin api within a mesos cluster
#!/bin/bash
# -> remember to run: dcos auth login first !!
DCOS_API_TOKEN=$(dcos config show core.dcos_acs_token)
url="http://{marathon-domain}/service/{marathon zeppelin name}"
notebook="2E617JZX1" # $url/#/notebook/2E617JZX1
paragraph="20190916-164803_817623738"
#Note: to get paragraph ID, download notebook, open json and look for -> paragraphs -> Item [N] -> id.
curl --request GET -s -H "Content-Type: application/json" -H "Authorization: token=$DCOS_API_TOKEN" $url/api/notebook
@tonythor
tonythor / ZeppelinService.scala
Created December 18, 2020 06:32
A scala infrastructure program that calls out to an Zeppelin paragraph
package com.gimmesome.zeppelin
import com.softwaremill.sttp._
import scala.util.parsing.json.JSON
// case class ZeppelinConfig (instance: String, baseUrl: String, authLoginUrl: String, authUid: String, authPass: String)
// Usage:
// import something.ZeppelinService
// val notebook = "2E6T7JZX1"
@tonythor
tonythor / emptyToNullUdf.scala
Created December 18, 2020 06:28
spark/scala : Convert all empty string records in a dataframe to null.
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.udf
// Usage: df.select(df.columns.map(c => emptyToNullUdf(col(c)).alias(c)): _*)
def emptyToNull(_str: String): Option[String] = {
_str match {
case d if (_str == null || _str.trim.isEmpty) => None
case _ => Some(_str)
}
}
val emptyToNullUdf = udf(emptyToNull(_: String))
@tonythor
tonythor / UseDariaToMakeExcelSafeCSV.scala
Created December 18, 2020 06:28
An scala example that leverages spark daria's multiRegexpReplace and bulkRegexpReplace to transform dataframe string columns into something that doesn't break excel
import com.github.mrpowers.spark.daria.sql.transformations
import scala.annotation.tailrec
// import other stuff related to spark
val DefaultReplacements = Map(
"'" -> "\\'",
"\"" -> "\\'",
"," -> "\\,")
// if you wanted to pass in a list of columns, say all columns in a DF, you could replace like so.