Last active
January 27, 2016 16:04
-
-
Save Jamesits/23f2bc877c9e6738b7eb to your computer and use it in GitHub Desktop.
This script has been used to analysis some gene test result in "tab separated value" format from some lab of ZJU. It saves tons of silly people's labor pasting two rows from two different xlsx to graphpad, click some analysis button and copy result back to the third xlsx. However, there is a drawback of this script: it's not parallel.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# James' Gene T-tester | |
# Written by James Swineson <jamesswineson@gmail.com>, 2016-01-27 | |
# All rights reserved. | |
# | |
# Install dependencies: | |
# pip3 install numpy scipy | |
# | |
# Tested to work under Python 3.5.1 on OS X 10.11.3 | |
# import openpyxl | |
import numpy as np | |
from scipy.stats import ttest_1samp, ttest_ind | |
# organized data format | |
class Data: | |
def __init__(this, id=None, name=None, normal_data=None, tumor_data=None): | |
this.id = id | |
this.name = name | |
this.normal_data = normal_data | |
this.tumor_data = tumor_data | |
def p_summary(p): | |
# http://graphpad.com/support/faq/what-is-the-meaning-of--or--or--in-reports-of-statistical-significance-from-prism-or-instat/ | |
# Symbol Meaning | |
# ns P > 0.05 | |
# * P ≤ 0.05 | |
# ** P ≤ 0.01 | |
# *** P ≤ 0.001 | |
# **** P ≤ 0.0001 | |
if p > 0.05: return "ns" | |
if p <= 0.0001: return "****" | |
if p <= 0.001: return "***" | |
if p <= 0.01: return "**" | |
return "*" | |
def paired_t_test(data1, data2): | |
t_statistic, p_value = ttest_1samp(np.array(data1) - np.array(data2), 0) | |
return p_value, p_summary(p_value), "Yes" if p_value < 0.05 else "No" | |
def unpaired_t_test(data1, data2): | |
t_statistic, p_value = ttest_ind(np.array(data1), np.array(data2)) | |
return p_value, p_summary(p_value), "Yes" if p_value < 0.05 else "No" | |
if __name__ == "__main__": | |
print("James' Gene Data Collector") | |
print("Reading data...") | |
# Read data | |
# txt version: | |
with open(r"data/STAD_gene_normal.txt") as f: | |
normal_list = [i.strip().split("\t") for i in f.readlines()[1:]] | |
with open(r"data/STAD_gene_tumor.txt") as f: | |
tumor_list = [i.strip().split("\t") for i in f.readlines()[1:]] | |
# dataset = [] | |
with open(r"data/output_gene.csv", "w") as out: | |
out.write(",,P value,P value summary,Significantly different? (P < 0.05)\n") | |
# seq = 1 | |
for n, t in zip(normal_list, tumor_list): | |
# id = seq | |
# seq += 1 | |
id = t[1] | |
name = n[0] | |
print("Collecting #{}: {}".format(id, name)) | |
data = Data(id, name, [float(x) for x in n[1:]], [float(x) for x in t[2:]]) | |
p, summary, isSignificant = unpaired_t_test(data.normal_data, data.tumor_data) | |
p_text = "<0.0001" if p < 0.0001 else round(p, 4) | |
# dataset.append(data) | |
out.write("{},{},{},{},{}\n".format(id, name, p_text, summary, isSignificant)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment