Skip to content

Instantly share code, notes, and snippets.

Avatar

Tam Nguyen tamsanh

View GitHub Profile
View github_catalog_hook.py
from io import StringIO
from typing import Optional, Dict, Any
import yaml
from kedro.framework.hooks import hook_impl
from kedro.io import DataCatalog
from kedro.versioning import Journal
def _read_github_repo_file(access_token, repo_name, filepath, branch='master'):
@tamsanh
tamsanh / example_shared_catalog.yml
Created Sep 19, 2020
Example Shared Kedro Catalog
View example_shared_catalog.yml
iris_data:
type: pandas.CSVDataSet
filepath: data/01_raw/iris.csv
@tamsanh
tamsanh / dict_hash.py
Last active Jul 5, 2020
Consistent Dictionary Hash
View dict_hash.py
import hashlib
import json
from typing import Dict, Any
def hash_dict(data: Dict[str, Any]):
data_str = json.dumps(data, sort_keys=True, default=str)
md5 = hashlib.md5()
md5.update(data_str.encode("utf-8"))
hash_value = md5.hexdigest()
return hash_value
@tamsanh
tamsanh / virtualenv_venv_quick_setup_macosx.sh
Last active Sep 24, 2018
virtualenv venv Quick Setup MacOSX - Python 3
View virtualenv_venv_quick_setup_macosx.sh
# Instructions for setting up a virtualenv venv
#########
# SETUP #
#########
## Make sure you have python3
python3 --version
@tamsanh
tamsanh / .ssh_config
Created Jun 4, 2018
Settings file to specify a specific host for a different ssh key.
View .ssh_config
Host target-repo-name.github.com
HostName github.com
User git
IdentityFile /home/ec2-user/.ssh/target-repo-name
IdentitiesOnly yes
@tamsanh
tamsanh / shifty.py
Created Apr 30, 2018
Python __rshift__ __lshift __rrshift__ __rlshift__ Examples and Order of Operations
View shifty.py
# Setup Classes for the Example
class Shifter:
def __init__(self, label):
self.label = label
def __str__(self):
return str(self.label)
def __rshift__(self, other):
print("#%s.__rshift__(%s)" % (self, other))
@tamsanh
tamsanh / get_calling_file_name.py
Created Apr 12, 2018
Gets the name of the calling file.
View get_calling_file_name.py
def get_calling_file_name():
# If this function is embedded in another function, offset the stack appropriately
import inspect
stack_offset = 1
stack = inspect.stack()
calling_file = stack[stack_offset][1]
return calling_file
@tamsanh
tamsanh / jupyter_cell_notify.py
Created Apr 6, 2018
Generate and play a sound in Jupyter Notebook when then cell is run. Useful as notification when a long-running cell is done.
View jupyter_cell_notify.py
from IPython.lib.display import Audio
import numpy as np
framerate = 4410
play_time_seconds = 3
t = np.linspace(0, play_time_seconds, framerate*play_time_seconds)
audio_data = np.sin(2*np.pi*300*t) + np.sin(2*np.pi*240*t)
Audio(audio_data, rate=framerate, autoplay=True)
@tamsanh
tamsanh / directory_word_counter.py
Created Mar 25, 2018
Count words in all files in a given directory, either recursively or non-recursively.
View directory_word_counter.py
import re
import io
import os
import csv
from glob import glob
DEFAULT_OUTPUT_NAME = 'word-counts.csv'
@tamsanh
tamsanh / human_file_size.py
Last active Mar 20, 2018
Get a human readable filesize of the output
View human_file_size.py
from __future__ import print
# Adopted from
# https://web.archive.org/web/20111010015624/http://blogmag.net/blog/read/38/Print_human_readable_file_size
# https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
You can’t perform that action at this time.