Skip to content

Instantly share code, notes, and snippets.

@stkbailey
stkbailey / main.py
Last active June 8, 2022 13:17
Create a Simplified Dagster Config
#%%
#
from collections import defaultdict
from dagster import op, graph, Field, config_mapping, GraphDefinition
@op(config_schema={"message": str, "other_stuff": Field(int, 9)})
def print_something(context):
@stkbailey
stkbailey / dynamic_job.py
Created March 29, 2022 11:20
Dagster Dyanmic Output
# https://docs.dagster.io/_apidocs/dynamic
from dagster import op, job, DynamicOut, DynamicOutput, Field, repository
from random import randrange
@op(config_schema={"directory": Field(str, default_value="./sample")}, out=DynamicOut(str))
def return_list_of_files(context) -> dict:
dir = context.op_config["directory"]
for ii in range(randrange(10)):
yield DynamicOutput(dir + str(ii), mapping_key=str(ii))
@stkbailey
stkbailey / dbt-execute-template.yml
Created August 4, 2021 02:37
dbt Workflow Template for Argo
apiVersion: argoproj.io/v1alpha1
kind: WorkflowTemplate
metadata:
name: dbt-exec-template
spec:
entrypoint: dbt-execute
arguments:
parameters:
- name: exec_command
@stkbailey
stkbailey / meltano-elt-template.yml
Last active September 21, 2023 06:07
Deploying Meltano on Argo Workflows
apiVersion: argoproj.io/v1alpha1
kind: WorkflowTemplate
metadata:
name: meltano-elt-template
spec:
entrypoint: run-elt
arguments:
parameters:
- name: extractor_name
- name: loader_name
@stkbailey
stkbailey / json_schema_to_sdk_properties.py
Last active August 14, 2021 19:39
Convert Singer Stream Catalog to Meltano SDK PropertiesList
import json
import pathlib
import sys
from typing import List
INDENTATION = " " * 4
TYPE_DICT = {
"string": "th.StringType",
"integer": "th.IntegerType",
@stkbailey
stkbailey / app.py
Created December 30, 2020 04:23
dbt_ Graph Analysis App
import json
import matplotlib.pyplot as plt
import networkx as nx
import pandas
import seaborn as sns
import streamlit
# Shapes; https://graphviz.org/doc/info/shapes.html
node_fmt = {
@stkbailey
stkbailey / looker_tests.yaml
Created March 24, 2020 10:37
GitHub Action for Looker Data Testing w/ Spectacles
# Place this file into `.github/workflows` in your Looker repository
# Create a "secret" in the repository settings with your CI user's
# Looker Client ID and Secret Token. Then update the environment variables below.
name: Looker Data Tests w/ Spectacles
on:
push:
branches:
- master
## This is a simple script to download comments from /r/utahjazz,
## look for bigrams that start with "G" then "H" (or vice versa),
## filter out likely non-names, and print them.
# Imports
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@stkbailey
stkbailey / glassdoor.py
Created May 6, 2018 17:22 — forked from scrapehero/glassdoor.py
Python 3 code to extract job listings from Glassdoor.com
from lxml import html, etree
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json
def parse(keyword, place):