Skip to content

Instantly share code, notes, and snippets.

@tleonardi
Created May 15, 2020 10:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tleonardi/0bb31e6a380e5766f04f4e197d36b38e to your computer and use it in GitHub Desktop.
Save tleonardi/0bb31e6a380e5766f04f4e197d36b38e to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
from collections import OrderedDict
test = "GMM_logit_pvalue"
df = pd.read_csv("out_nanocompore_results.tsv", sep="\t")
df["Peak"] = 0
df = df[["pos", "chr", "genomicPos", "ref_id", "strand", "ref_kmer", "Peak", test]]
transcripts = set(df["ref_id"])
p_val_lim = 0.01
sig_lim = -np.log10(p_val_lim)
i=1
for tx in transcripts:
if(not i%50): print(i)
i+=1
res = df[df.ref_id==tx]
x = -np.log10(res[test])
x = x.fillna(0)
threshold = sig_lim
peaks, extra = find_peaks(x, height=threshold, distance=9)
peaks_indexes = res.iloc[peaks].index
df.loc[peaks_indexes, "Peak"] = extra["peak_heights"]
df.to_csv("out_nanocompore_results_peaks.txt", index=False, sep="\t")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment