Skip to content

Instantly share code, notes, and snippets.


Tam Nguyen tamsanh

View GitHub Profile
from io import StringIO
from typing import Optional, Dict, Any
import yaml
from kedro.framework.hooks import hook_impl
from import DataCatalog
from kedro.versioning import Journal
def _read_github_repo_file(access_token, repo_name, filepath, branch='master'):
tamsanh / example_shared_catalog.yml
Created Sep 19, 2020
Example Shared Kedro Catalog
View example_shared_catalog.yml
type: pandas.CSVDataSet
filepath: data/01_raw/iris.csv
tamsanh /
Last active Jul 5, 2020
Consistent Dictionary Hash
import hashlib
import json
from typing import Dict, Any
def hash_dict(data: Dict[str, Any]):
data_str = json.dumps(data, sort_keys=True, default=str)
md5 = hashlib.md5()
hash_value = md5.hexdigest()
return hash_value
tamsanh /
Last active Sep 24, 2018
virtualenv venv Quick Setup MacOSX - Python 3
# Instructions for setting up a virtualenv venv
## Make sure you have python3
python3 --version
tamsanh / .ssh_config
Created Jun 4, 2018
Settings file to specify a specific host for a different ssh key.
View .ssh_config
User git
IdentityFile /home/ec2-user/.ssh/target-repo-name
IdentitiesOnly yes
tamsanh /
Created Apr 30, 2018
Python __rshift__ __lshift __rrshift__ __rlshift__ Examples and Order of Operations
# Setup Classes for the Example
class Shifter:
def __init__(self, label):
self.label = label
def __str__(self):
return str(self.label)
def __rshift__(self, other):
print("#%s.__rshift__(%s)" % (self, other))
tamsanh /
Created Apr 12, 2018
Gets the name of the calling file.
def get_calling_file_name():
# If this function is embedded in another function, offset the stack appropriately
import inspect
stack_offset = 1
stack = inspect.stack()
calling_file = stack[stack_offset][1]
return calling_file
tamsanh /
Created Apr 6, 2018
Generate and play a sound in Jupyter Notebook when then cell is run. Useful as notification when a long-running cell is done.
from IPython.lib.display import Audio
import numpy as np
framerate = 4410
play_time_seconds = 3
t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
audio_data = np.sin(2*np.pi*300*t) + np.sin(2*np.pi*240*t)
Audio(audio_data, rate=framerate, autoplay=True)
tamsanh /
Created Mar 25, 2018
Count words in all files in a given directory, either recursively or non-recursively.
import re
import io
import os
import csv
from glob import glob
DEFAULT_OUTPUT_NAME = 'word-counts.csv'
tamsanh /
Last active Mar 20, 2018
Get a human readable filesize of the output
from __future__ import print
# Adopted from
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
You can’t perform that action at this time.