Last active
January 29, 2020 07:33
-
-
Save wendazhou/0f32f9945abb1f72c23f407bc7732f39 to your computer and use it in GitHub Desktop.
Tutorial job
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
#SBATCH -c 1 | |
#SBATCH --mem 1GB | |
#SBATCH --time 10:00 | |
#SBATCH -A stats | |
#SBATCH --array 0-8 | |
module load anaconda | |
conda load pytorch | |
python tutorial.py --shard $SLURM_ARRAY_TASK_ID |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def read_all(files): | |
results = pd.concat([pd.read_csv(f) for f in files]) | |
return results.groupby(['n', 'distribution'])['pvalue'].agg(level_50=lambda x: np.mean(x < 0.5), level_95=lambda x: np.mean(x < 0.95)) | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('files', type=str, nargs='+') | |
args = parser.parse_args() | |
result = read_all(args.files) | |
print(result) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.stats | |
import pandas as pd | |
import itertools | |
def simulate_t_test_pvalue(n, dist='norm', rng=None): | |
if rng is None: | |
rng = np.random | |
if dist == 'normal': | |
x = rng.randn(n) | |
elif dist == 't': | |
x = rng.standard_t(3, n) | |
elif dist == 'cauchy': | |
x = rng.standard_cauchy(n) | |
else: | |
raise ValueError('invalid distribution') | |
test_result = scipy.stats.ttest_1samp(x, 0) | |
return test_result.pvalue | |
def run_simulation(n, dist, num_replications, rng=None): | |
pvalues = [simulate_t_test_pvalue(n, dist, rng) for _ in range(num_replications)] | |
return pd.DataFrame({ | |
'pvalue': pvalues, | |
'n': n, | |
'distribution': dist | |
}) | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--shard', type=int) | |
args = parser.parse_args() | |
ns = [10, 20, 40] | |
dists = ['normal', 't', 'cauchy'] | |
shard = args.shard | |
configs = list(itertools.product(ns, dists)) | |
result = run_simulation(*configs[shard], num_replications=2000, rng=np.random.RandomState(shard)) | |
result.to_csv('output_{0}.csv'.format(shard), index=False) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment