Skip to content

Instantly share code, notes, and snippets.

@sdia-zz
Created October 18, 2018 06:55
Show Gist options
  • Save sdia-zz/6f9190593abbc06a8397f28dadc16be8 to your computer and use it in GitHub Desktop.
Save sdia-zz/6f9190593abbc06a8397f28dadc16be8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#-*- coding:utf-8 -*-
import os
import numpy as np
MDE = .01
POWER = .8
BOOT_SIZE = 1000
BOOT_RUNS = 256
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
REPLIES_PER_DAU = os.path.join(CURRENT_DIR, 'replies_per_dau.tsv')
REPLIES_PER_DAU = np.loadtxt(REPLIES_PER_DAU)
REPLIES_PER_DAU_LO = REPLIES_PER_DAU - MDE * REPLIES_PER_DAU
REPLIES_PER_DAU_HI = REPLIES_PER_DAU + MDE * REPLIES_PER_DAU
LISTINGS_PER_DAU = os.path.join(CURRENT_DIR, 'listings_per_dau.tsv')
LISTINGS_PER_DAU = np.loadtxt(LISTINGS_PER_DAU)
LISTINGS_PER_DAU_LO = LISTINGS_PER_DAU - MDE * LISTINGS_PER_DAU
LISTINGS_PER_DAU_HI = LISTINGS_PER_DAU + MDE * LISTINGS_PER_DAU
def get_ci(a, bsize=BOOT_SIZE, bruns=BOOT_RUNS):
boot_means = []
for i in range(bruns):
b = np.random.choice(a, size=bsize, replace=True)
boot_means.append(np.mean(b))
lo = np.percentile(boot_means, 2.5)
hi = np.percentile(boot_means, 97.5)
return '{:2.2f}, {:2.2f}'.format(lo, hi)
def get_power(a, sample_size, ci_lo, ci_hi, bruns=BOOT_RUNS):
boot_means = []
for i in range(bruns):
b = np.random.choice(a, size=sample_size, replace=True)
boot_means.append(np.mean(b))
return 100.0 * len([o for o in boot_means if (o <= ci_lo or o >= ci_hi)]) / len(boot_means)
def generate_ci(a):
n = 64
for i in range(16):
n *= 2
print('{}, '.format(n), get_ci(a, bsize=n))
def power_analysis(data, ci):
for d in ci:
sample_size = int(d)
ci_lo = ci[d][0]
ci_hi = ci[d][1]
power = get_power(data, sample_size=sample_size,
ci_lo=ci_lo,
ci_hi=ci_hi)
print('{:2.2f}\t{:2.2f}'.format(sample_size, power))
if __name__ == '__main__':
CI_REPLIES_PER_DAU = {
'100' : (1.80, 3.04),
'1000' : (2.07, 2.58),
'10000' : (2.19, 2.32),
'100000' : (2.23, 2.28),
'1000000' : (2.25, 2.26)
}
CI_REPLIES_PER_DAU = {
'128' : (1.77, 2.82),
'256' : (1.92, 2.83),
'512' : (2.05, 2.68),
'1024' : (2.07, 2.50),
'2048' : (2.13, 2.41),
'4096' : (2.16, 2.42),
'8192' : (2.18, 2.35),
'16384' : (2.21, 2.32),
'32768' : (2.22, 2.29),
'65536' : (2.22, 2.28),
'131072' : (2.23, 2.27),
'262144' : (2.24, 2.27),
'524288' : (2.24, 2.26),
'1048576' : (2.24, 2.26)
}
CI_LISTINGS_PER_DAU = {
'100' : (1.36, 2.15),
'1000' : (1.56, 1.81),
'10000' : (1.64, 1.72),
'100000' : (1.66, 1.69),
'1000000' : (1.67, 1.68)
}
CI_LISTINGS_PER_DAU = {
'128' : (1.41, 2.14),
'256' : (1.47, 1.94),
'512' : (1.54, 1.89),
'1024' : (1.57, 1.81),
'2048' : (1.60, 1.76),
'4096' : (1.62, 1.74),
'8192' : (1.64, 1.72),
'16384' : (1.64, 1.71),
'32768' : (1.66, 1.70),
'65536' : (1.66, 1.69),
'131072' : (1.67, 1.69),
'262144' : (1.67, 1.68),
'524288' : (1.67, 1.68),
'1048576' : (1.67, 1.68)
}
power_analysis(REPLIES_PER_DAU_HI, CI_REPLIES_PER_DAU)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment