Binsheng Liu binshengliu

## random_choice_noreplace.py
# https://stackoverflow.com/a/45438143/955952
def random_choice_noreplace(m, n, axis=-1):
    # m, n are the number of rows, cols of output
    return np.random.rand(m, n).argsort(axis=axis)

## conda-perl-for-rouge.sh
conda config --append channels bioconda
conda install perl perl-db-file perl-xml-parser perl-libwww-perl # This will install tons of packages
sed -i '1 s/perl.*/env perl/' ~/.files2rouge/ROUGE-1.5.5.pl

## product_ndarray.py
arr = np.array(np.meshgrid(arr0, arr1)).T.reshape(-1, 2)

## dedup-trec-dup.sh
sort -u -k 1,2 run.monobert.dev.small.tsv | trec_eval -m recip_rank_cut.10 qrels.dev.small.tsv -
sort -u -k 1,2 run.duobert.dev.small.tsv | trec_eval -m recip_rank_cut.10 qrels.dev.small.tsv -

## gist:e11716ab7462c7eed32a790e45a4a5a4
perf record -F 99 -g -a <command>
perf script | ~/src/FlameGraph/stackcollapse-perf.pl | ~/src/FlameGraph/flamegraph.pl > perf.svg
google-chrome perf.svg

## extra_legend.py
# https://stackoverflow.com/a/10154763/955952
fig.savefig('samplefigure', bbox_extra_artists=(lgd,text), bbox_inches='tight')

## fit_figure.tex
% https://tex.stackexchange.com/q/32886/124998
\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{myfig}

## groupby_mp.py
from more_itertools import always_iterable
from typing import Callable, Union
import multiprocessing
import pandas as pd


def groupby_mp(groupby_df: pd.core.groupby.DataFrameGroupBy,
               func: Callable[[pd.DataFrame], Union[pd.DataFrame, pd.Series]],
               num_cpus: int = multiprocessing.cpu_count() // 2,
               chunksize: int = 1) -> pd.DataFrame:

## rank_metrics.py
"""Information Retrieval metrics

Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np

## tqdmf.py
def tqdmf(path, *args, **kwargs):
    with tqdm(
            total=os.stat(path).st_size,
            unit='B',
            unit_scale=True,
            unit_divisor=1024,
            *args,
            **kwargs) as bar:
        with open(path, 'r') as f:
            for line in f:
	# https://stackoverflow.com/a/45438143/955952
	def random_choice_noreplace(m, n, axis=-1):
	# m, n are the number of rows, cols of output
	return np.random.rand(m, n).argsort(axis=axis)
	conda config --append channels bioconda
	conda install perl perl-db-file perl-xml-parser perl-libwww-perl # This will install tons of packages
	sed -i '1 s/perl.*/env perl/' ~/.files2rouge/ROUGE-1.5.5.pl
	sort -u -k 1,2 run.monobert.dev.small.tsv \| trec_eval -m recip_rank_cut.10 qrels.dev.small.tsv -
	sort -u -k 1,2 run.duobert.dev.small.tsv \| trec_eval -m recip_rank_cut.10 qrels.dev.small.tsv -
	perf record -F 99 -g -a <command>
	perf script \| ~/src/FlameGraph/stackcollapse-perf.pl \| ~/src/FlameGraph/flamegraph.pl > perf.svg
	google-chrome perf.svg
	# https://stackoverflow.com/a/10154763/955952
	fig.savefig('samplefigure', bbox_extra_artists=(lgd,text), bbox_inches='tight')
	% https://tex.stackexchange.com/q/32886/124998
	\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{myfig}
	from more_itertools import always_iterable
	from typing import Callable, Union
	import multiprocessing
	import pandas as pd


	def groupby_mp(groupby_df: pd.core.groupby.DataFrameGroupBy,
	func: Callable[[pd.DataFrame], Union[pd.DataFrame, pd.Series]],
	num_cpus: int = multiprocessing.cpu_count() // 2,
	chunksize: int = 1) -> pd.DataFrame:
	"""Information Retrieval metrics

	Useful Resources:
	http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
	http://www.nii.ac.jp/TechReports/05-014E.pdf
	http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
	http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
	Learning to Rank for Information Retrieval (Tie-Yan Liu)
	"""
	import numpy as np
	def tqdmf(path, args, *kwargs):
	with tqdm(
	total=os.stat(path).st_size,
	unit='B',
	unit_scale=True,
	unit_divisor=1024,
	*args,
	**kwargs) as bar:
	with open(path, 'r') as f:
	for line in f: