from io import StringIO
from typing import Optional, Dict, Any
import yaml
from kedro.framework.hooks import hook_impl
from import DataCatalog
from kedro.versioning import Journal
def _read_github_repo_file(access_token, repo_name, filepath, branch='master'):
tamsanh / example_shared_catalog.yml
Created Sep 19, 2020
Example Shared Kedro Catalog
type: pandas.CSVDataSet
filepath: data/01_raw/iris.csv
tamsanh /
Last active Jul 5, 2020
Consistent Dictionary Hash
import hashlib
import json
from typing import Dict, Any
def hash_dict(data: Dict[str, Any]):
data_str = json.dumps(data, sort_keys=True, default=str)
md5 = hashlib.md5()
hash_value = md5.hexdigest()
return hash_value
tamsanh /
Last active Sep 24, 2018
virtualenv venv Quick Setup MacOSX - Python 3
# Instructions for setting up a virtualenv venv
## Make sure you have python3
python3 --version
tamsanh / .ssh_config
Created Jun 4, 2018
Settings file to specify a specific host for a different ssh key.
User git
IdentityFile /home/ec2-user/.ssh/target-repo-name
IdentitiesOnly yes
tamsanh /
Created Apr 30, 2018
Python __rshift__ __lshift __rrshift__ __rlshift__ Examples and Order of Operations
# Setup Classes for the Example
class Shifter:
def __init__(self, label):
self.label = label
def __str__(self):
return str(self.label)
def __rshift__(self, other):
print("#%s.__rshift__(%s)" % (self, other))
tamsanh /
Created Apr 12, 2018
Gets the name of the calling file.
def get_calling_file_name():
# If this function is embedded in another function, offset the stack appropriately
import inspect
stack_offset = 1
stack = inspect.stack()
calling_file = stack[stack_offset][1]
return calling_file
tamsanh /
Created Apr 6, 2018
Generate and play a sound in Jupyter Notebook when then cell is run. Useful as notification when a long-running cell is done.
from IPython.lib.display import Audio
import numpy as np
framerate = 4410
play_time_seconds = 3
t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
audio_data = np.sin(2*np.pi*300*t) + np.sin(2*np.pi*240*t)
Audio(audio_data, rate=framerate, autoplay=True)
tamsanh /
Created Mar 25, 2018
Count words in all files in a given directory, either recursively or non-recursively.
import re
import io
import os
import csv
from glob import glob
DEFAULT_OUTPUT_NAME = 'word-counts.csv'
tamsanh /
Last active Mar 20, 2018
Get a human readable filesize of the output
from __future__ import print
# Adopted from
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
