raven4752 raven4752

## util.py
"""
a lazy wrapper to export python function directly as command line interface with one line of code
"""

import click
from functools import wraps, partial
import inspect


def get_prefix(key: str, prefix_set: set):

## cache_decorator.py

def cache_result(cache_file, param_in_suffix=None, root_dir=None):
    """
    decorator function to cache function's return value (assume returned value can be pickled)
    the cached file with be located to root_dir/[_param-key_param-value,]/cache_file.pkl
    usage:
    @cache_result(cache_file_name,param_in_suffix=[param_key],)
    def function_to_create_cache():
        ....
    :param root_dir:  dir to store the cache if the wrapped function has root_dir as a param, the value will be used instead

## Dockerfile
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
ARG PYTHON_VERSION=3.6
# workaround of the gpg error. see https://github.com/NVIDIA/nvidia-docker/issues/619
RUN rm /etc/apt/sources.list.d/cuda.list

# use the source of tuna
RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g;s/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
RUN sed -i 's/http/https/g' /etc/apt/sources.list

#for mongodb

## hash_file.py
import hashlib
def md5(fname):
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()


## valid_model.py
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import pandas as pd
import numpy as np
import os
from  matplotlib import pyplot as plt


## install_docker.sh
#!/bin/bash
sudo apt-get update
sudo apt-get install -y \
    apt-transport-https \
    ca-certificates \
    curl \
    software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository \
   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \

## bibex_to_apa.py
import bibtexparser
from bibtexparser.bparser import BibTexParser
from bibtexparser.customization import convert_to_unicode


def parse_author(auth_str):
    auth_str_authors = auth_str.split(" and ")
    authors = []
    for author in auth_str_authors:
        if "," in author:

## gist:3669ac1cf4aa7f9faf63d3328cd507f7
import numpy as np
import pandas as pd
from keras.callbacks import Callback

class ScoreMetric(Callback):
    def __init__(self, score_func, num_input=1, num_target=1):
        super(ScoreMetric, self).__init__()
        self.num_input = num_input
        self.num_target = num_target
        self.score_func = score_func

## expr.py
def multi_array_shuffle(*arrays, random_state=1):
    array_length = arrays[0].shape[0]
    permutated = []
    np.random.seed(random_state)
    permutation = np.random.permutation(array_length)
    for array in arrays:
        permutated.append(array[permutation, ...])
    return permutated

def cv_model_func(model_func, inputs, targets, scores_func, label=None, seed=1, fold=5, **kwargs):

## train_test_valid_split.py
#from https://stackoverflow.com/questions/38250710/how-to-split-data-into-3-sets-train-validation-and-test
def train_validate_test_split(df, train_percent=.6, validate_percent=.2, seed=None):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    validate_end = int(validate_percent * m) + train_end
    train = df.ix[perm[:train_end]]
    validate = df.ix[perm[train_end:validate_end]]
    test = df.ix[perm[validate_end:]]
	"""
	a lazy wrapper to export python function directly as command line interface with one line of code
	"""

	import click
	from functools import wraps, partial
	import inspect


	def get_prefix(key: str, prefix_set: set):

	def cache_result(cache_file, param_in_suffix=None, root_dir=None):
	"""
	decorator function to cache function's return value (assume returned value can be pickled)
	the cached file with be located to root_dir/[_param-key_param-value,]/cache_file.pkl
	usage:
	@cache_result(cache_file_name,param_in_suffix=[param_key],)
	def function_to_create_cache():
	....
	:param root_dir: dir to store the cache if the wrapped function has root_dir as a param, the value will be used instead
	FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
	ARG PYTHON_VERSION=3.6
	# workaround of the gpg error. see https://github.com/NVIDIA/nvidia-docker/issues/619
	RUN rm /etc/apt/sources.list.d/cuda.list

	# use the source of tuna
	RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g;s/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
	RUN sed -i 's/http/https/g' /etc/apt/sources.list

	#for mongodb
	import hashlib
	def md5(fname):
	hash_md5 = hashlib.md5()
	with open(fname, "rb") as f:
	for chunk in iter(lambda: f.read(4096), b""):
	hash_md5.update(chunk)
	return hash_md5.hexdigest()
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.ensemble import AdaBoostClassifier
	from sklearn.model_selection import KFold
	from sklearn.metrics import roc_auc_score
	import pandas as pd
	import numpy as np
	import os
	from matplotlib import pyplot as plt
	#!/bin/bash
	sudo apt-get update
	sudo apt-get install -y \
	apt-transport-https \
	ca-certificates \
	curl \
	software-properties-common
	curl -fsSL https://download.docker.com/linux/ubuntu/gpg \| sudo apt-key add -
	sudo add-apt-repository \
	"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
	import bibtexparser
	from bibtexparser.bparser import BibTexParser
	from bibtexparser.customization import convert_to_unicode


	def parse_author(auth_str):
	auth_str_authors = auth_str.split(" and ")
	authors = []
	for author in auth_str_authors:
	if "," in author:
	import numpy as np
	import pandas as pd
	from keras.callbacks import Callback

	class ScoreMetric(Callback):
	def __init__(self, score_func, num_input=1, num_target=1):
	super(ScoreMetric, self).__init__()
	self.num_input = num_input
	self.num_target = num_target
	self.score_func = score_func
	def multi_array_shuffle(*arrays, random_state=1):
	array_length = arrays[0].shape[0]
	permutated = []
	np.random.seed(random_state)
	permutation = np.random.permutation(array_length)
	for array in arrays:
	permutated.append(array[permutation, ...])
	return permutated

	def cv_model_func(model_func, inputs, targets, scores_func, label=None, seed=1, fold=5, **kwargs):
	#from https://stackoverflow.com/questions/38250710/how-to-split-data-into-3-sets-train-validation-and-test
	def train_validate_test_split(df, train_percent=.6, validate_percent=.2, seed=None):
	np.random.seed(seed)
	perm = np.random.permutation(df.index)
	m = len(df.index)
	train_end = int(train_percent * m)
	validate_end = int(validate_percent * m) + train_end
	train = df.ix[perm[:train_end]]
	validate = df.ix[perm[train_end:validate_end]]
	test = df.ix[perm[validate_end:]]