Skip to content

Instantly share code, notes, and snippets.

@Jamesits
Last active January 27, 2016 16:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jamesits/23f2bc877c9e6738b7eb to your computer and use it in GitHub Desktop.
Save Jamesits/23f2bc877c9e6738b7eb to your computer and use it in GitHub Desktop.
This script has been used to analysis some gene test result in "tab separated value" format from some lab of ZJU. It saves tons of silly people's labor pasting two rows from two different xlsx to graphpad, click some analysis button and copy result back to the third xlsx. However, there is a drawback of this script: it's not parallel.
#!/usr/bin/env python3
# James' Gene T-tester
# Written by James Swineson <jamesswineson@gmail.com>, 2016-01-27
# All rights reserved.
#
# Install dependencies:
# pip3 install numpy scipy
#
# Tested to work under Python 3.5.1 on OS X 10.11.3
# import openpyxl
import numpy as np
from scipy.stats import ttest_1samp, ttest_ind
# organized data format
class Data:
def __init__(this, id=None, name=None, normal_data=None, tumor_data=None):
this.id = id
this.name = name
this.normal_data = normal_data
this.tumor_data = tumor_data
def p_summary(p):
# http://graphpad.com/support/faq/what-is-the-meaning-of--or--or--in-reports-of-statistical-significance-from-prism-or-instat/
# Symbol Meaning
# ns P > 0.05
# * P ≤ 0.05
# ** P ≤ 0.01
# *** P ≤ 0.001
# **** P ≤ 0.0001
if p > 0.05: return "ns"
if p <= 0.0001: return "****"
if p <= 0.001: return "***"
if p <= 0.01: return "**"
return "*"
def paired_t_test(data1, data2):
t_statistic, p_value = ttest_1samp(np.array(data1) - np.array(data2), 0)
return p_value, p_summary(p_value), "Yes" if p_value < 0.05 else "No"
def unpaired_t_test(data1, data2):
t_statistic, p_value = ttest_ind(np.array(data1), np.array(data2))
return p_value, p_summary(p_value), "Yes" if p_value < 0.05 else "No"
if __name__ == "__main__":
print("James' Gene Data Collector")
print("Reading data...")
# Read data
# txt version:
with open(r"data/STAD_gene_normal.txt") as f:
normal_list = [i.strip().split("\t") for i in f.readlines()[1:]]
with open(r"data/STAD_gene_tumor.txt") as f:
tumor_list = [i.strip().split("\t") for i in f.readlines()[1:]]
# dataset = []
with open(r"data/output_gene.csv", "w") as out:
out.write(",,P value,P value summary,Significantly different? (P < 0.05)\n")
# seq = 1
for n, t in zip(normal_list, tumor_list):
# id = seq
# seq += 1
id = t[1]
name = n[0]
print("Collecting #{}: {}".format(id, name))
data = Data(id, name, [float(x) for x in n[1:]], [float(x) for x in t[2:]])
p, summary, isSignificant = unpaired_t_test(data.normal_data, data.tumor_data)
p_text = "<0.0001" if p < 0.0001 else round(p, 4)
# dataset.append(data)
out.write("{},{},{},{},{}\n".format(id, name, p_text, summary, isSignificant))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment