Skip to content

Instantly share code, notes, and snippets.

@zero323
Last active December 15, 2015 18:19
Show Gist options
  • Save zero323/5302895 to your computer and use it in GitHub Desktop.
Save zero323/5302895 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
import glob
from collections import Counter
prefix = 'gene_'
def get_files_names(glob_pattern = '*.txt'):
return(glob.glob(glob_pattern))
def get_jaspar_bits(files_names):
files_refs = map(lambda x: open(x), files_names)
jaspar_bits = map(lambda x: map(lambda y: y.split()[0], x.readlines()), files_refs)
map(lambda x: x.close(), files_refs)
return(jaspar_bits)
def get_tfbs_list(tfbs_list_file = 'tfbs_list'):
with open(tfbs_list_file) as fr:
return (map(lambda x: x.strip(), fr.readlines()))
def jaspar_tfbs_count(jaspar_bits, tfbs_list, files_names):
counts = map(
lambda x: map(lambda y: x.setdefault(y, 0), tfbs_list),
map(lambda x: Counter(x), jaspar_bits))
return(map(
lambda x: [x[0]] + x[1],
zip(map(lambda x: prefix + x.replace('.txt', ''), files_names), counts)))
def format_count_output(tfbs_count):
lines = map(lambda x: reduce(lambda y, z: '{0}, {1}'.format(y,z), x), tfbs_count)
return(reduce(lambda x, y: '{0}\n{1}'.format(x, y), lines))
def count_to_fingerprint(count):
bits = map(lambda z: '1' * min(int(z), 10) + '0' * max(10 - int(z), 0), count)
return(reduce(lambda y, z: y + z, bits))
def get_fingerprint(tfbs_count):
return(map(lambda x: [x[0], count_to_fingerprint(x[1:])], tfbs_count))
def compute_tanimoto(first, second):
zipped = zip(map(lambda x: int(x), first), map(lambda y: int(y), second))
return(round(
float(sum(map(lambda x: x[0] & x[1], zipped))) /
float(sum(map(lambda x: x[0] | x[1], zipped))), 2))
def get_tanimoto(fingerprint, fingerprint_list):
return(map(lambda x: compute_tanimoto(fingerprint, x), fingerprint_list))
def fingerprints_to_tanimoto(genes_with_fingerprints):
fingerprints = map(lambda x: x[1], genes_with_fingerprints)
return(map(lambda x: [x[0]] + get_tanimoto(x[1], fingerprints), genes_with_fingerprints))
def format_tanimoto_output(genes_with_tanimoto):
genes = reduce(lambda x, y: '{0},{1}'.format(x, y), map(lambda x: x[0], genes_with_tanimoto))
lines = map(lambda x: reduce(lambda y, z: '{0},{1}'.format(y, z), x), genes_with_tanimoto)
return(reduce(lambda x, y: '{0}\n{1}'.format(x, y), [genes] + lines))
if __name__ == '__main__':
files_names = get_files_names()
jaspar_bits = get_jaspar_bits(files_names)
tfbs_list = get_tfbs_list()
tfbs_count = jaspar_tfbs_count(
jaspar_bits=jaspar_bits,
tfbs_list=tfbs_list,
files_names=files_names)
genes_fingerprints = get_fingerprint(tfbs_count)
genes_with_tanimoto = fingerprints_to_tanimoto(genes_fingerprints)
print(format_tanimoto_output(genes_with_tanimoto))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment