Created
November 9, 2018 14:38
-
-
Save almasaeed2010/b1fa99a68c8dbcd171384a9f5611e063 to your computer and use it in GitHub Desktop.
Convert ht_seq output of biomaterial files to a matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
if len(sys.argv) < 2: | |
print ('Please provide a file name') | |
exit(1) | |
files = sys.argv[1:] | |
matrix = {} | |
biomaterials = {} | |
genes = {} | |
for file in files: | |
fp = open(file, 'r') | |
biomaterial = file.split('.') | |
biomaterial = biomaterial[0] | |
biomaterials[biomaterial] = biomaterial | |
for line in fp: | |
line = line.strip("\n") | |
data = line.split("\t") | |
gene = data[0] | |
genes[gene] = gene | |
expression = data[1] | |
matrix[gene] = matrix.get(gene, {}) | |
matrix[gene][biomaterial] = expression | |
fp.close() | |
print ("\t", "\t".join(biomaterials.keys())) | |
for gene in genes: | |
line = [gene] | |
for biomaterial in biomaterials: | |
line.append(matrix[gene][biomaterial]) | |
print ("\t".join(line)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment