Last active
December 15, 2015 18:19
-
-
Save zero323/5302895 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import glob | |
from collections import Counter | |
prefix = 'gene_' | |
def get_files_names(glob_pattern = '*.txt'): | |
return(glob.glob(glob_pattern)) | |
def get_jaspar_bits(files_names): | |
files_refs = map(lambda x: open(x), files_names) | |
jaspar_bits = map(lambda x: map(lambda y: y.split()[0], x.readlines()), files_refs) | |
map(lambda x: x.close(), files_refs) | |
return(jaspar_bits) | |
def get_tfbs_list(tfbs_list_file = 'tfbs_list'): | |
with open(tfbs_list_file) as fr: | |
return (map(lambda x: x.strip(), fr.readlines())) | |
def jaspar_tfbs_count(jaspar_bits, tfbs_list, files_names): | |
counts = map( | |
lambda x: map(lambda y: x.setdefault(y, 0), tfbs_list), | |
map(lambda x: Counter(x), jaspar_bits)) | |
return(map( | |
lambda x: [x[0]] + x[1], | |
zip(map(lambda x: prefix + x.replace('.txt', ''), files_names), counts))) | |
def format_count_output(tfbs_count): | |
lines = map(lambda x: reduce(lambda y, z: '{0}, {1}'.format(y,z), x), tfbs_count) | |
return(reduce(lambda x, y: '{0}\n{1}'.format(x, y), lines)) | |
def count_to_fingerprint(count): | |
bits = map(lambda z: '1' * min(int(z), 10) + '0' * max(10 - int(z), 0), count) | |
return(reduce(lambda y, z: y + z, bits)) | |
def get_fingerprint(tfbs_count): | |
return(map(lambda x: [x[0], count_to_fingerprint(x[1:])], tfbs_count)) | |
def compute_tanimoto(first, second): | |
zipped = zip(map(lambda x: int(x), first), map(lambda y: int(y), second)) | |
return(round( | |
float(sum(map(lambda x: x[0] & x[1], zipped))) / | |
float(sum(map(lambda x: x[0] | x[1], zipped))), 2)) | |
def get_tanimoto(fingerprint, fingerprint_list): | |
return(map(lambda x: compute_tanimoto(fingerprint, x), fingerprint_list)) | |
def fingerprints_to_tanimoto(genes_with_fingerprints): | |
fingerprints = map(lambda x: x[1], genes_with_fingerprints) | |
return(map(lambda x: [x[0]] + get_tanimoto(x[1], fingerprints), genes_with_fingerprints)) | |
def format_tanimoto_output(genes_with_tanimoto): | |
genes = reduce(lambda x, y: '{0},{1}'.format(x, y), map(lambda x: x[0], genes_with_tanimoto)) | |
lines = map(lambda x: reduce(lambda y, z: '{0},{1}'.format(y, z), x), genes_with_tanimoto) | |
return(reduce(lambda x, y: '{0}\n{1}'.format(x, y), [genes] + lines)) | |
if __name__ == '__main__': | |
files_names = get_files_names() | |
jaspar_bits = get_jaspar_bits(files_names) | |
tfbs_list = get_tfbs_list() | |
tfbs_count = jaspar_tfbs_count( | |
jaspar_bits=jaspar_bits, | |
tfbs_list=tfbs_list, | |
files_names=files_names) | |
genes_fingerprints = get_fingerprint(tfbs_count) | |
genes_with_tanimoto = fingerprints_to_tanimoto(genes_fingerprints) | |
print(format_tanimoto_output(genes_with_tanimoto)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment