This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from typing import Any | |
from dagster import IOManager, OutputContext, TableColumn, MetadataEntry, MetadataValue, TableSchema, InputContext, \ | |
AssetKey | |
from dagster._check import str_param, opt_dict_param, opt_bool_param, opt_str_param | |
import pandas as pd | |
from delta import DeltaTable | |
from hail.utils import FatalError | |
from pyspark.sql import DataFrame, SparkSession |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
from dataclasses import dataclass, asdict | |
import os | |
from time import sleep | |
from typing import List | |
import boto3 | |
from dagster import ( | |
op, | |
Field, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Defines a SSMSource configuration type that can retrieve a value from AWS Systems Manager Parameter Store. | |
Can be used like so: | |
@op( | |
config_schema = {'param': Field(SSMSource)} | |
) | |
def do_something(context): | |
ssm_param_value = context.op_config['param'] | |
... | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From RealPython blog. | |
from functools import lru_cache, wraps | |
from datetime import datetime, timedelta | |
# Note: this will clear whole cache each time expiration is reached. Should adapt to expire items independently when they expire | |
def timed_lru_cache(seconds: int, maxsize: int = 128): | |
def wrapper_cache(func): | |
func = lru_cache(maxsize=maxsize)(func) | |
func.lifetime = timedelta(seconds=seconds) | |
func.expiration = datetime.utcnow() + func.lifetime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import sleep | |
from zappa.asynchronous import task | |
from zappa import asynchronous | |
# For some reason zappa isn't picking up the table name from the zappa settings automatically so I'm just setting | |
# it manually for now | |
asynchronous.ASYNC_RESPONSE_TABLE = 'phils_done_tasks' | |
# This wrapper provided by the zappa package enables this function to be run on a remote lambda invocation. |