Skip to content

Instantly share code, notes, and snippets.

View anna-geller's full-sized avatar

Anna Geller anna-geller

View GitHub Profile
import numpy as np
import pandas as pd
class TimeseriesGenerator:
def __init__(
self,
start_date: str,
end_date: str,
frequency: str = "H",
import json
import boto3
import logging
import pandas as pd
from great_expectations import from_pandas
from great_expectations.dataset.pandas_dataset import PandasDataset
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
import json
import boto3
import logging
import pandas as pd
from great_expectations import from_pandas
from great_expectations.dataset.pandas_dataset import PandasDataset
from great_expectations.core.expectation_validation_result import (
ExpectationValidationResult,
)
@anna-geller
anna-geller / main.py
Created June 27, 2021 14:48
Script to run data tests locally
import logging
import pandas as pd
from src.timeseries_data_generator import TimeseriesGenerator
from src.timeseries_data_test_runner import TimeseriesDataTestRunner
DATASET = "local_test_without_s3"
DEFAULT_START_DATE = "2021-07-01"
DEFAULT_END_DATE = "2021-07-31 23:59"
SKEWED_END_DATE = "2021-07-25 23:59"
import json
import logging
import awswrangler as wr
from timeseries_data_test_runner import TimeseriesDataTestRunner
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def handler(event, context):
import json
import logging
import awswrangler as wr
from timeseries_data_test_runner import TimeseriesDataTestRunner
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def handler(event, context):
awswrangler==2.9.0
great_expectations==0.13.19
# set a base image that includes Lambda Runtime API:
# Source: https://hub.docker.com/r/amazon/aws-lambda-python
FROM amazon/aws-lambda-python:3.8
# optional: ensure that pip is up to date
RUN /var/lang/bin/python3.8 -m pip install --upgrade pip
# first we COPY only requirements.txt to ensure that later builds
# with changes to your src code will be faster due to caching of this layer
COPY requirements.txt .
RUN pip install -r requirements.txt
docker build -t data-tests .
aws ecr create-repository --repository-name data-tests --image-scanning-configuration scanOnPush=true
docker tag data-tests:latest 123456789.dkr.ecr.eu-central-1.amazonaws.com/data-tests:latest
aws ecr get-login-password | docker login --username AWS --password-stdin 123456789.dkr.ecr.eu-central-1.amazonaws.com
docker push 123456789.dkr.ecr.eu-central-1.amazonaws.com/data-tests:latest
import boto3
sns = boto3.client("sns", region_name="eu-central-1")
# CREATE TOPIC
topic_name = "ge_timeseries_data_test"
create_response = sns.create_topic(Name=topic_name)
topic_arn = create_response.get("TopicArn")
print("Create topic response: %s", create_response)