Skip to content

Instantly share code, notes, and snippets.

View ian-whitestone's full-sized avatar
🐍
Exit code 143

Ian Whitestone ian-whitestone

🐍
Exit code 143
View GitHub Profile
@ian-whitestone
ian-whitestone / parse_unixtime.py
Last active August 5, 2020 22:27
Parse a unix timestamp with proper handling for milliseconds
from __future__ import unicode_literals
import pytest
from starscream.pipeline.stage import TransformStage
from pyspark.sql import functions as F, types as T
from starscream.contract import Contract
from starscream.utils.dataframe import as_dicts, from_dicts
@ian-whitestone
ian-whitestone / reusable_stage.py
Last active July 29, 2020 16:06
Test starscream stages locally with `dev console --local`
from __future__ import unicode_literals
import pytest
import starscream.reusable_stages as RS
from starscream.contract import Contract
from starscream.utils.dataframe import as_dicts, from_dicts
import pyspark.sql.types as T
@ian-whitestone
ian-whitestone / sample_code.py
Created July 29, 2020 16:04
Test a generic starscream stage
from __future__ import unicode_literals
import pytest
from starscream.pipeline.stage import TransformStage
from pyspark.sql import functions as F, types as T
@ian-whitestone
ian-whitestone / stack_trace.txt
Last active July 2, 2020 17:27
Dask + pydantic pickling error
---------------------------------------------------------------------------
PicklingError Traceback (most recent call last)
<ipython-input-67-59601d7f9a60> in <module>
15
16 bag = db.from_sequence(data)
---> 17 bag.map(validate_data).compute()
~/Library/Caches/pypoetry/virtualenvs/domi-IWOYYLRr-py3.7/lib/python3.7/site-packages/dask/base.py in compute(self, **kwargs)
164 dask.base.compute
165 """
@ian-whitestone
ian-whitestone / progress_bar.py
Created June 28, 2020 20:23
A simple progress bar that just shows the total number of "things" processed and the time elapsed
"""
A simple progress bar that just shows the total number of "things" processed
and the time elapsed
"""
import time
from datetime import timedelta
from rich.progress import Progress, ProgressColumn, Task, TaskID, Text, TextColumn
@ian-whitestone
ian-whitestone / App.js
Created May 30, 2020 18:52
Learning react useState hooks by doing...
// https://github.com/mui-org/material-ui/tree/master/examples/create-react-app
import React from 'react';
import Container from '@material-ui/core/Container';
import Typography from '@material-ui/core/Typography';
import Box from '@material-ui/core/Box';
import { makeStyles } from '@material-ui/core/styles';
import Link from '@material-ui/core/Link';
import SvgIcon from '@material-ui/core/SvgIcon';
import Button from '@material-ui/core/Button';
import pickle
PROXY_URL = 'https://z1h4spb3u7.execute-api.us-west-1.amazonaws.com/proxy_us_west_1'
def proxy_request(url):
proxy_response = requests.post(
PROXY_URL,
data={'url': url}
)
if not proxy_response.ok:
@ian-whitestone
ian-whitestone / run_docker.sh
Created March 16, 2020 02:17
Script for running executing python jobs in docker
# User defined constants
HOME="/home/ianwhitestone/"
LOGS_DIR="logs"
MASTER_LOG_FILENAME="master.txt"
SECRETS_FILE="$HOME/.secrets"
# Input Arguments
MODULE=$1
FUNCTION=$2
@ian-whitestone
ian-whitestone / notify.py
Last active March 17, 2020 01:01
Script for sending failure notifications in slack
"""
Trigger slack notifications
"""
import argparse
import logging
import os
from slack.web.client import WebClient
LOGGER = logging.getLogger(__name__)
@ian-whitestone
ian-whitestone / great_expecations_examples.py
Created January 12, 2020 22:32
Quickstart examples for getting up and running with great expectations
## Pandas
import great_expectations as ge
# Build up expectations on a sample dataset and save them
train = ge.read_csv("data/npi.csv")
train.expect_column_values_to_not_be_null("NPI")
train.save_expectation_suite("npi_csv_expectations.json")
# Load in a new dataset and test them
test = ge.read_csv("data/npi_new.csv")