Krewn/Comps.py

## Comps.py
#########################################################
##   Author : Kevin Nelson                             ##
#					                #
##	Mapping MicroArrayData back to AS              ##
#########################################################
import sys
import csv
import pylab as pl
import matplotlib.pyplot as plt
import numpy
import os
import threading
#print "ImportsComplete"
				####Dependancies
#AnaylizeData.py   Data.tsv   GbyAs.csv	test
#GbyAs.tsv

present=1
absent=-1

def is_number(s):
	try:
		float(s)
		return True
	except ValueError:
		return False

def clip(D):
	#print '\n.\n..\n...\n....\n...\n..\n.\n'
	#prDict2(D)
	BM96={}
	n=0
	b=12
	while(is_number(D[b][2])):
		b+=1
	#print b
	for k in range(11,b):
		row=[]
		for p in range (1,98,1):
			if(is_number(D[k][p])):
				if(float(D[k][p])>0 and float(D[k][p])!=999):
					row.append(float(D[k][p]))
				else:
					row.append(0)
			else:
				row.append(D[k][p])
		BM96[k-11]=row
	#print '\#'
	#print BM96
	return BM96

def Boolienize(data):
	global present
	global absent
	BData={}
	n=0
	try:
		for k in range(0,len(data),1):					#boolienize Data
			row=[]
			for p in data[k]:
				if(is_number(p)):
					if(float(p)>0 and float(p)!=999):
						row.append(present)
					else:
						row.append(absent)
				else:
					row.append(p)
			BData[k]=row
			#print BData
		return BData
	except(KeyError):
		for k in data:					#boolienize Data
			row=[]
			for p in data[k]:
				if(is_number(p)):
					if(float(p)>0 and float(p)!=999):
						row.append(present)
					else:
						row.append(absent)
				else:
					row.append(p)
			BData[k]=row
			#print BData
		return BData
#data={}

def readFileT(n):
	fileIN=n
	Data={}
	with open(fileIN) as csvfile:
		tableMain=csv.reader(csvfile,delimiter='\t')
		k=0
		for row in tableMain:
			Data[k]=row
			k+=1							#Read the Data into dict Data
	return(Data)

def readFileC(n):
	fileIN=n
	Data={}
	with open(fileIN) as csvfile:
		tableMain=csv.reader(csvfile,delimiter=',')
		k=0
		for row in tableMain:
			Data[k]=row
			k+=1							#Read the Data into dict Data
	return(Data)

def printDic2d(nm_at):
	for k in nm_at:
		for k2 in nm_at[k]:
			print str(k)+'\t'+str(k2)+'\t'+str(nm_at[k][k2])
def prDict(Dists):
	once = True
	Head="\t"
	tail=""
	n=0
	for k in Dists:
		if once == True and n>0:
			for k2 in Dists[k]:
				Head+=str(k2)+'\t'
			once =False
		#Head+=str(k)+"\t"
		tail+=str(k)+"\t"
		for k2 in Dists[k]:
			try:
				tail+=str(Dists[k][k2])+"\t"
			except(TypeError):
				tail+=str(k2)+"\t"
		tail+="\n"
		n+=1
	Head+="\n"
	print Head+tail
	print "\n\n"

def strDict(Dists):
	once = True
	Head="\t"
	tail=""
	n=0
	for k in Dists:
		if once == True and n>0:
			for k2 in Dists[k]:
				Head+=str(k2)+'\t'
			once =False
		#Head+=str(k)+"\t"
		tail+=str(k)+"\t"
		for k2 in Dists[k]:
			try:
				tail+=str(Dists[k][k2])+"\t"
			except(TypeError):
				tail+=str(k2)+"\t"
		tail+="\n"
		n+=1
	Head+="\n"
	return( Head+tail )

def getBetterTables3d(data):
	Data={}
	for k in data:
		Data[k]={}
		for k2 in data[k]:
			if data[k][k2][0]=="Name":
				for k3 in range(1,len(data[k][k2])):
					Data[k][data[k][k2][k3]]={}
					for k4 in range(1,len(data[k][k2])):
						Data[k][data[k][k2][k3]][data[k][k2][k4]]=data[k][k3][k4]
	#print Data
	for k in Data:
		for k2 in Data[k]:
			for k3 in Data[k][k2]:
				try:
					a=float(Data[k][k2][k3])
				except(ValueError):
					print Data[k][k2][k3]
	return Data

def getBetterTables2D(data):
	Data={}
	first=True
	for k in data:
		if first:
			b = k
			first=False
		else:
			First=True
			Data[data[k][0]]={}
			c=0
			for k2 in data[k]:
				if First:
					First=False
				else:
					c+=1
					Data[data[k][0]][data[b][c]]=k2
#		else:
#			Data[data[k][0]]={}
#			n=0
#			First=True
#			for k2 in data[k]:
#				if First == True:
#					First = False
#				else:
#					Data[data[k][0]][data[0][n]]=k2
#					n+=1
	return Data

#def checkSys():
#	SysArgvs={}
#	length=0
#	def noInput():
#		print '...BetterLuckNextTime...'
#		k=0
#		while k<15:
#			try:
#				SysArgvs[k]=sys.agrv[k]
#				k+=1
#			except(AttributeError):
#				print SysArgvs
#				return(SysArgvs,length)
#		pass
#	try:
#		time=10.0
#		t = threading.Timer(time,noInput)
#		t.start()
#		mmk = str(raw_input("Type the word Arrdvark and press enter:\nYou have "+str(time)+" seconds."))
#		k=0
#		while(k<3):
#			try:
#				x = int(raw_input("1:Enter M value\n2:Enter M and BOOL for absents\n3:A_M_P"))
#				if(x>3 or x<1):
#					ValueError()
#				break
#			except ValueError:
#				print "Enter 1,2 or 3 ... Try again:"
#				k+=1
#		if(k==1):
#			SysArgvs[0]=float(raw_input("Enter Moderates weight value:"))
#			length=1
#			return(SysArgvs,length)
#		if(k==2):
#			for k2 in range(0,2):
#				if(k2==0):
#					 SysArgvs[0]=float(raw_input("Enter Moderates weight value & (True/False -Absent Values from GXD-):"))
#		if(k == 3):
#			print 'Enter Calls'
#			while k<3:
#				try:
#					if(k==0):
#						x = float(raw_input("Absent values Will be Calculated As:"))
#					if(k==1):
#						x = float(raw_input("Moderate values Will be Calculated As:"))
#					if(k==2):
#						x = float(raw_input("Calls values Will be Calculated As:"))
#					k+=1
#				except ValueError:
#					print "Oops!  That was no valid number.  Try again..."
#			k=1
#	except(AttributeError):
#		print 'WoooBuddy'
#		pass


#a = checkSys()
#print a


def getGbyAs(GbyAS , Data):
	global present
	global absent
	GbyAs={}
	genes=[]
	for k in Data:
		genes=Data[k].keys()
		break
	#print(genes)
	for k in GbyAS:
		#print(GbyAS[k][0])
		GbyAs[GbyAS[k][0]]={}
		for k2 in genes:
			GbyAs[GbyAS[k][0]][k2]=0.
	#print(len(GbyAs))
	#print(len(genes))
	#prDict(GbyAs)

	for k in GbyAS:
		First = True
		for k2 in GbyAS[k]:
			if First:First=False
			elif(k2[:len(k2)-1] in genes):
				if(k2[-1:]=='-'):
					GbyAs[GbyAS[k][0]][k2[0:len(k2)-1]]=absent
				else:
					GbyAs[GbyAS[k][0]][k2[0:len(k2)-1]]=present
	#printDic2d(GbyAs)
	return GbyAs

def kos(a):
	cos = 0
	magA= 0
	magB= 0
	for k in a:
		cos+=k[0]*k[1]
		magA+=numpy.power(k[0],2)
		magB+=numpy.power(k[1],2)
	cos = cos / (numpy.power(magA,0.5)*numpy.power(magB,0.5))
	return(cos)

def corro(a):
	n=0.
	avgA=0.
	avgB=0.
	for k in a:
		n+=1.
		avgA+=k[0]
		avgB+=k[1]
	if(n==0):return(None)
	else:
		avgA/=n
		avgB/=n
		coro=0.
		stdA=0.
		stdB=0.
		for k in a:
			coro+=(k[0]-avgA)*(k[1]-avgB)
			stdA+=numpy.power(k[0]-avgA,2)
			stdB+=numpy.power(k[1]-avgB,2)
		coro /=numpy.power(stdA*stdB,0.5)
		return(coro)

def euc(a):
	mag= 0.
	for k in a:
		mag+=numpy.power(k[0]-k[1],2)
	mag = numpy.power(mag,0.5)
	return(mag)				#while these computations are O(n) data prep is O(n^2) as we search n keys n times, once for each key.

print '-init -ImportsComplete--'

def runComp(data1,data2,compType):

	Data= getBetterTables2D(Boolienize(clip(readFileC(data1))))#'Data.tsv'
	GbyAS=readFileT(data2)#'GbyAs.csv'

	#print 'Data -As Read-'
	#print Data
	#print Data
	#print GbyAS
	#Data=getBetterTables2D(Data)
	#prDict(Data)
	#GbyAS=getBetterTables2D(GbyAS)
	#prDict(Data)
	#prDict(GbyAS)

	GbyAS = getGbyAs(GbyAS,Data)
	#print'GbyAS'
	#prDict(GbyAS)

#	CosAB={}								#microArraySmaples x AS
#	Expression = {}
#	for k in Data:
#		for k3 in Data[k]:
#			if k3!="ProbeID#" and k3!= "Accession#" and k3 != "Gene" and k3 != "":
#				CosAB[k3]={}
#				for k2 in GbyAS:
#					CosAB[k3][k2]=0.
#		break


	#print 'Data'
	#prDict(Data)
#	for k in Data:
#		Expression[k]={}
#		for k3 in Data[k]:
#			if k3!="ProbeID#" and k3!= "Accession#" and k3 != "Gene":
#				if Data[k][k3]=='P':
#					Expression[k][k3]=1.0#sys.argv[1]
#				if Data[k][k3]=='M':
#					Expression[k][k3]=0.5#sys.argv[2]
#				if Data[k][k3]=='A':
#					Expression[k][k3]=-1.0#sys.argv[3]


	Comp = {}
	#printDic2d(GbyAS)
	for k in GbyAS:					#Cos(A,B) = A.B/(|A|*|B|)
		Comp[k]={}
		for k3 in Data:
			Comp[k][k3]=[]
		for k2 in GbyAS[k]:			#Where A_n and B_n are Known
			if(GbyAS[k][k2] != 0):
				for k3 in Data:
					#print('#'*25)
					#print(GbyAS[k][k2])
					#print(Data[k3])
					a= 0 if GbyAS[k][k2] == -1 else 1
					Comp[k][k3].append([a,Data[k3][k2]])
	CosAB={}
	for k in Comp:
		CosAB[k]={}
		for k2 in Comp[k]:
			#print k
			#print k2
			#print Comp[k][k2]
			#print Comp[k][k2]
			if(compType == "cos"):CosAB[k][k2]=kos(Comp[k][k2])
			elif(compType == "eucdist"):CosAB[k][k2]=euc(Comp[k][k2])
			elif(compType == "corolate"):CosAB[k][k2]=corro(Comp[k][k2])
			#print CosAB[k2][k]
	#print 'CosAB'
	#prDict(CosAB)
	#print 'Expression'
	#prDict(Expression)
	return(CosAB)

temp=open('Cos.tsv','w')
temp.write(strDict(runComp('_mousePlate1.csv','GbyAs.csv',"cos")))
temp.close()

temp=open('EucDist.tsv','w')
temp.write(strDict(runComp('_mousePlate1.csv','GbyAs.csv',"eucdist")))
temp.close()

temp = open('Corolate.tsv','w')
temp.write(strDict(runComp('_mousePlate1.csv','GbyAs.csv',"corolate")))
temp.close()

temp=open('LocalData.tsv','w')
temp.write(strDict(getBetterTables2D(Boolienize(clip(readFileC('_mousePlate1.csv'))))))
temp.close()

temp = open('gxdData.tsv','w')
temp.write(strDict(getGbyAs(readFileT('GbyAs.csv'),getBetterTables2D(Boolienize(clip(readFileC('_mousePlate1.csv')))))))
temp.close()
#for k in Data:
#	for k2 in GbyAS:
#		dot=0.0
#		maga=0.0
#		magb=0.0
#		for k3 in GbyAS:
#			dot+=
#
	#########################################################
	## Author : Kevin Nelson ##
	# #
	## Mapping MicroArrayData back to AS ##
	#########################################################
	import sys
	import csv
	import pylab as pl
	import matplotlib.pyplot as plt
	import numpy
	import os
	import threading
	#print "ImportsComplete"
	####Dependancies
	#AnaylizeData.py Data.tsv GbyAs.csv test
	#GbyAs.tsv

	present=1
	absent=-1

	def is_number(s):
	try:
	float(s)
	return True
	except ValueError:
	return False

	def clip(D):
	#print '\n.\n..\n...\n....\n...\n..\n.\n'
	#prDict2(D)
	BM96={}
	n=0
	b=12
	while(is_number(D[b][2])):
	b+=1
	#print b
	for k in range(11,b):
	row=[]
	for p in range (1,98,1):
	if(is_number(D[k][p])):
	if(float(D[k][p])>0 and float(D[k][p])!=999):
	row.append(float(D[k][p]))
	else:
	row.append(0)
	else:
	row.append(D[k][p])
	BM96[k-11]=row
	#print '\#'
	#print BM96
	return BM96

	def Boolienize(data):
	global present
	global absent
	BData={}
	n=0
	try:
	for k in range(0,len(data),1): #boolienize Data
	row=[]
	for p in data[k]:
	if(is_number(p)):
	if(float(p)>0 and float(p)!=999):
	row.append(present)
	else:
	row.append(absent)
	else:
	row.append(p)
	BData[k]=row
	#print BData
	return BData
	except(KeyError):
	for k in data: #boolienize Data
	row=[]
	for p in data[k]:
	if(is_number(p)):
	if(float(p)>0 and float(p)!=999):
	row.append(present)
	else:
	row.append(absent)
	else:
	row.append(p)
	BData[k]=row
	#print BData
	return BData
	#data={}

	def readFileT(n):
	fileIN=n
	Data={}
	with open(fileIN) as csvfile:
	tableMain=csv.reader(csvfile,delimiter='\t')
	k=0
	for row in tableMain:
	Data[k]=row
	k+=1 #Read the Data into dict Data
	return(Data)

	def readFileC(n):
	fileIN=n
	Data={}
	with open(fileIN) as csvfile:
	tableMain=csv.reader(csvfile,delimiter=',')
	k=0
	for row in tableMain:
	Data[k]=row
	k+=1 #Read the Data into dict Data
	return(Data)

	def printDic2d(nm_at):
	for k in nm_at:
	for k2 in nm_at[k]:
	print str(k)+'\t'+str(k2)+'\t'+str(nm_at[k][k2])
	def prDict(Dists):
	once = True
	Head="\t"
	tail=""
	n=0
	for k in Dists:
	if once == True and n>0:
	for k2 in Dists[k]:
	Head+=str(k2)+'\t'
	once =False
	#Head+=str(k)+"\t"
	tail+=str(k)+"\t"
	for k2 in Dists[k]:
	try:
	tail+=str(Dists[k][k2])+"\t"
	except(TypeError):
	tail+=str(k2)+"\t"
	tail+="\n"
	n+=1
	Head+="\n"
	print Head+tail
	print "\n\n"

	def strDict(Dists):
	once = True
	Head="\t"
	tail=""
	n=0
	for k in Dists:
	if once == True and n>0:
	for k2 in Dists[k]:
	Head+=str(k2)+'\t'
	once =False
	#Head+=str(k)+"\t"
	tail+=str(k)+"\t"
	for k2 in Dists[k]:
	try:
	tail+=str(Dists[k][k2])+"\t"
	except(TypeError):
	tail+=str(k2)+"\t"
	tail+="\n"
	n+=1
	Head+="\n"
	return( Head+tail )

	def getBetterTables3d(data):
	Data={}
	for k in data:
	Data[k]={}
	for k2 in data[k]:
	if data[k][k2][0]=="Name":
	for k3 in range(1,len(data[k][k2])):
	Data[k][data[k][k2][k3]]={}
	for k4 in range(1,len(data[k][k2])):
	Data[k][data[k][k2][k3]][data[k][k2][k4]]=data[k][k3][k4]
	#print Data
	for k in Data:
	for k2 in Data[k]:
	for k3 in Data[k][k2]:
	try:
	a=float(Data[k][k2][k3])
	except(ValueError):
	print Data[k][k2][k3]
	return Data

	def getBetterTables2D(data):
	Data={}
	first=True
	for k in data:
	if first:
	b = k
	first=False
	else:
	First=True
	Data[data[k][0]]={}
	c=0
	for k2 in data[k]:
	if First:
	First=False
	else:
	c+=1
	Data[data[k][0]][data[b][c]]=k2
	# else:
	# Data[data[k][0]]={}
	# n=0
	# First=True
	# for k2 in data[k]:
	# if First == True:
	# First = False
	# else:
	# Data[data[k][0]][data[0][n]]=k2
	# n+=1
	return Data

	#def checkSys():
	# SysArgvs={}
	# length=0
	# def noInput():
	# print '...BetterLuckNextTime...'
	# k=0
	# while k<15:
	# try:
	# SysArgvs[k]=sys.agrv[k]
	# k+=1
	# except(AttributeError):
	# print SysArgvs
	# return(SysArgvs,length)
	# pass
	# try:
	# time=10.0
	# t = threading.Timer(time,noInput)
	# t.start()
	# mmk = str(raw_input("Type the word Arrdvark and press enter:\nYou have "+str(time)+" seconds."))
	# k=0
	# while(k<3):
	# try:
	# x = int(raw_input("1:Enter M value\n2:Enter M and BOOL for absents\n3:A_M_P"))
	# if(x>3 or x<1):
	# ValueError()
	# break
	# except ValueError:
	# print "Enter 1,2 or 3 ... Try again:"
	# k+=1
	# if(k==1):
	# SysArgvs[0]=float(raw_input("Enter Moderates weight value:"))
	# length=1
	# return(SysArgvs,length)
	# if(k==2):
	# for k2 in range(0,2):
	# if(k2==0):
	# SysArgvs[0]=float(raw_input("Enter Moderates weight value & (True/False -Absent Values from GXD-):"))
	# if(k == 3):
	# print 'Enter Calls'
	# while k<3:
	# try:
	# if(k==0):
	# x = float(raw_input("Absent values Will be Calculated As:"))
	# if(k==1):
	# x = float(raw_input("Moderate values Will be Calculated As:"))
	# if(k==2):
	# x = float(raw_input("Calls values Will be Calculated As:"))
	# k+=1
	# except ValueError:
	# print "Oops! That was no valid number. Try again..."
	# k=1
	# except(AttributeError):
	# print 'WoooBuddy'
	# pass


	#a = checkSys()
	#print a



	def getGbyAs(GbyAS , Data):
	global present
	global absent
	GbyAs={}
	genes=[]
	for k in Data:
	genes=Data[k].keys()
	break
	#print(genes)
	for k in GbyAS:
	#print(GbyAS[k][0])
	GbyAs[GbyAS[k][0]]={}
	for k2 in genes:
	GbyAs[GbyAS[k][0]][k2]=0.
	#print(len(GbyAs))
	#print(len(genes))
	#prDict(GbyAs)

	for k in GbyAS:
	First = True
	for k2 in GbyAS[k]:
	if First:First=False
	elif(k2[:len(k2)-1] in genes):
	if(k2[-1:]=='-'):
	GbyAs[GbyAS[k][0]][k2[0:len(k2)-1]]=absent
	else:
	GbyAs[GbyAS[k][0]][k2[0:len(k2)-1]]=present
	#printDic2d(GbyAs)
	return GbyAs

	def kos(a):
	cos = 0
	magA= 0
	magB= 0
	for k in a:
	cos+=k[0]*k[1]
	magA+=numpy.power(k[0],2)
	magB+=numpy.power(k[1],2)
	cos = cos / (numpy.power(magA,0.5)*numpy.power(magB,0.5))
	return(cos)

	def corro(a):
	n=0.
	avgA=0.
	avgB=0.
	for k in a:
	n+=1.
	avgA+=k[0]
	avgB+=k[1]
	if(n==0):return(None)
	else:
	avgA/=n
	avgB/=n
	coro=0.
	stdA=0.
	stdB=0.
	for k in a:
	coro+=(k[0]-avgA)*(k[1]-avgB)
	stdA+=numpy.power(k[0]-avgA,2)
	stdB+=numpy.power(k[1]-avgB,2)
	coro /=numpy.power(stdA*stdB,0.5)
	return(coro)

	def euc(a):
	mag= 0.
	for k in a:
	mag+=numpy.power(k[0]-k[1],2)
	mag = numpy.power(mag,0.5)
	return(mag) #while these computations are O(n) data prep is O(n^2) as we search n keys n times, once for each key.

	print '-init -ImportsComplete--'

	def runComp(data1,data2,compType):

	Data= getBetterTables2D(Boolienize(clip(readFileC(data1))))#'Data.tsv'
	GbyAS=readFileT(data2)#'GbyAs.csv'

	#print 'Data -As Read-'
	#print Data
	#print Data
	#print GbyAS
	#Data=getBetterTables2D(Data)
	#prDict(Data)
	#GbyAS=getBetterTables2D(GbyAS)
	#prDict(Data)
	#prDict(GbyAS)

	GbyAS = getGbyAs(GbyAS,Data)
	#print'GbyAS'
	#prDict(GbyAS)

	# CosAB={} #microArraySmaples x AS
	# Expression = {}
	# for k in Data:
	# for k3 in Data[k]:
	# if k3!="ProbeID#" and k3!= "Accession#" and k3 != "Gene" and k3 != "":
	# CosAB[k3]={}
	# for k2 in GbyAS:
	# CosAB[k3][k2]=0.
	# break


	#print 'Data'
	#prDict(Data)
	# for k in Data:
	# Expression[k]={}
	# for k3 in Data[k]:
	# if k3!="ProbeID#" and k3!= "Accession#" and k3 != "Gene":
	# if Data[k][k3]=='P':
	# Expression[k][k3]=1.0#sys.argv[1]
	# if Data[k][k3]=='M':
	# Expression[k][k3]=0.5#sys.argv[2]
	# if Data[k][k3]=='A':
	# Expression[k][k3]=-1.0#sys.argv[3]


	Comp = {}
	#printDic2d(GbyAS)
	for k in GbyAS: #Cos(A,B) = A.B/(\|A\|*\|B\|)
	Comp[k]={}
	for k3 in Data:
	Comp[k][k3]=[]
	for k2 in GbyAS[k]: #Where A_n and B_n are Known
	if(GbyAS[k][k2] != 0):
	for k3 in Data:
	#print('#'*25)
	#print(GbyAS[k][k2])
	#print(Data[k3])
	a= 0 if GbyAS[k][k2] == -1 else 1
	Comp[k][k3].append([a,Data[k3][k2]])
	CosAB={}
	for k in Comp:
	CosAB[k]={}
	for k2 in Comp[k]:
	#print k
	#print k2
	#print Comp[k][k2]
	#print Comp[k][k2]
	if(compType == "cos"):CosAB[k][k2]=kos(Comp[k][k2])
	elif(compType == "eucdist"):CosAB[k][k2]=euc(Comp[k][k2])
	elif(compType == "corolate"):CosAB[k][k2]=corro(Comp[k][k2])
	#print CosAB[k2][k]
	#print 'CosAB'
	#prDict(CosAB)
	#print 'Expression'
	#prDict(Expression)
	return(CosAB)

	temp=open('Cos.tsv','w')
	temp.write(strDict(runComp('_mousePlate1.csv','GbyAs.csv',"cos")))
	temp.close()

	temp=open('EucDist.tsv','w')
	temp.write(strDict(runComp('_mousePlate1.csv','GbyAs.csv',"eucdist")))
	temp.close()

	temp = open('Corolate.tsv','w')
	temp.write(strDict(runComp('_mousePlate1.csv','GbyAs.csv',"corolate")))
	temp.close()

	temp=open('LocalData.tsv','w')
	temp.write(strDict(getBetterTables2D(Boolienize(clip(readFileC('_mousePlate1.csv'))))))
	temp.close()

	temp = open('gxdData.tsv','w')
	temp.write(strDict(getGbyAs(readFileT('GbyAs.csv'),getBetterTables2D(Boolienize(clip(readFileC('_mousePlate1.csv')))))))
	temp.close()
	#for k in Data:
	# for k2 in GbyAS:
	# dot=0.0
	# maga=0.0
	# magb=0.0
	# for k3 in GbyAS:
	# dot+=
	#