Skip to content

Instantly share code, notes, and snippets.

@raven4752
raven4752 / util.py
Created June 13, 2021 16:08
expose python function to cmd in one line
"""
a lazy wrapper to export python function directly as command line interface with one line of code
"""
import click
from functools import wraps, partial
import inspect
def get_prefix(key: str, prefix_set: set):
@raven4752
raven4752 / cache_decorator.py
Last active December 18, 2019 11:09
python decorator function to cache function's returned value to disk
def cache_result(cache_file, param_in_suffix=None, root_dir=None):
"""
decorator function to cache function's return value (assume returned value can be pickled)
the cached file with be located to root_dir/[_param-key_param-value,]/cache_file.pkl
usage:
@cache_result(cache_file_name,param_in_suffix=[param_key],)
def function_to_create_cache():
....
:param root_dir: dir to store the cache if the wrapped function has root_dir as a param, the value will be used instead
@raven4752
raven4752 / Dockerfile
Last active November 14, 2018 15:10
dockerfile and requirements.txt for abstractive summarization task with pytorch using chinese mirrors
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
ARG PYTHON_VERSION=3.6
# workaround of the gpg error. see https://github.com/NVIDIA/nvidia-docker/issues/619
RUN rm /etc/apt/sources.list.d/cuda.list
# use the source of tuna
RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g;s/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
RUN sed -i 's/http/https/g' /etc/apt/sources.list
#for mongodb
@raven4752
raven4752 / hash_file.py
Created November 7, 2018 05:38
hash a file in python
import hashlib
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
@raven4752
raven4752 / valid_model.py
Created November 7, 2018 04:36
machine learning routine code
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt
@raven4752
raven4752 / install_docker.sh
Last active May 6, 2019 17:57
script to install docker and nvidia-docker
#!/bin/bash
sudo apt-get update
sudo apt-get install -y \
apt-transport-https \
ca-certificates \
curl \
software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
@raven4752
raven4752 / bibex_to_apa.py
Last active September 12, 2018 06:33
turn bibtex string into apa formatted citation string. the authors' names are concatenated by commas. First name and mid name are shortened.
import bibtexparser
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import convert_to_unicode
def parse_author(auth_str):
auth_str_authors = auth_str.split(" and ")
authors = []
for author in auth_str_authors:
if "," in author:
@raven4752
raven4752 / gist:3669ac1cf4aa7f9faf63d3328cd507f7
Created July 11, 2018 09:56
callback to save best model and early stopping with multi-input/multi-output using custom score functions
import numpy as np
import pandas as pd
from keras.callbacks import Callback
class ScoreMetric(Callback):
def __init__(self, score_func, num_input=1, num_target=1):
super(ScoreMetric, self).__init__()
self.num_input = num_input
self.num_target = num_target
self.score_func = score_func
@raven4752
raven4752 / expr.py
Last active June 11, 2018 11:50
cross validating model with multiple input/output
def multi_array_shuffle(*arrays, random_state=1):
array_length = arrays[0].shape[0]
permutated = []
np.random.seed(random_state)
permutation = np.random.permutation(array_length)
for array in arrays:
permutated.append(array[permutation, ...])
return permutated
def cv_model_func(model_func, inputs, targets, scores_func, label=None, seed=1, fold=5, **kwargs):
@raven4752
raven4752 / train_test_valid_split.py
Created April 4, 2018 08:20
a function to split train validation and test set
#from https://stackoverflow.com/questions/38250710/how-to-split-data-into-3-sets-train-validation-and-test
def train_validate_test_split(df, train_percent=.6, validate_percent=.2, seed=None):
np.random.seed(seed)
perm = np.random.permutation(df.index)
m = len(df.index)
train_end = int(train_percent * m)
validate_end = int(validate_percent * m) + train_end
train = df.ix[perm[:train_end]]
validate = df.ix[perm[train_end:validate_end]]
test = df.ix[perm[validate_end:]]