Skip to content

Instantly share code, notes, and snippets.

@endrebak
Last active December 12, 2019 11:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save endrebak/acabb3a01b90d0795b832d10f5fe97ba to your computer and use it in GitHub Desktop.
Save endrebak/acabb3a01b90d0795b832d10f5fe97ba to your computer and use it in GitHub Desktop.
from scipy.stats import gaussian_kde
import numpy as np
import pandas as pd
df = pd.read_table(f, sep="\t")
values = df.CorrelationSum.sort_values()
gk = gaussian_kde(values)
vals = np.linspace(values.min(), values.max(), 1000)
res = np.array([gk.integrate_box_1d(0, v) for v in vals])
result = []
for cutoff in [0.001, 0.01, 0.05, 0.1]:
cutoff_idx = len(res[res < cutoff])
cutoff_value = vals[cutoff_idx]
number = (values < cutoff_value).sum()
percentage = 100 * (number / len(values))
result.append({"Cutoff": cutoff, "CutoffValue": cutoff_value, "Number": number, "Percentage": percentage})
result = pd.DataFrame.from_dict(result)
print(result)
result.to_csv(o, sep="\t", index=False, float_format="%.3f")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment