vestalisvirginis/random_algorithm.py

## random_algorithm.py
import numpy as np
from functional import seq
from collections import deque, defaultdict
import time

def RandomNumberGenerator(dna, k):
  #return an array (0,t) of random integers, than can be used as starting point for selecting random k-mer along dna strings
  return np.random.randint(0, DnaToMatrix(dna).shape[1]+1-k, size = (DnaToMatrix(dna).shape[0], 1))

 def DnaToMatrix(dna):
  #return a matrix of the list of dna strand
  dna_matrix = deque()
  for i in dna:
    dna_matrix.append(list(i))
  return np.array(dna_matrix)

def RandomMotif(dna, k):
  #return a matrix of randomely selected k-mer per dna strings
  a = deque()
  for pos, l in zip( range(RandomNumberGenerator(dna, k).shape[0]), RandomNumberGenerator(dna, k)):
    a.append(list(DnaToMatrix(dna)[pos,int(l):int(l)+k]))
  return np.array(a)

def A_count(l, matrix_init):
  matrix_init[0,l] +=1
  return matrix_init

def C_count(l, matrix_init):
  matrix_init[1,l] +=1
  return matrix_init

def G_count(l, matrix_init):
  matrix_init[2,l] +=1
  return matrix_init

def T_count(l, matrix_init):
  matrix_init[3,l] +=1
  return matrix_init


def NTP_count(n, l, matrix_init):
  switcher = {
    'A': A_count,
    'C': C_count,
    'G': G_count,
    'T': T_count,
  }
  # Get the function from switcher dictionary
  func = switcher.get(n, lambda: "Invalid nucleotide")
  # Execute the function
  return func(l, matrix_init)

def count_matrix(dna, k):
  # Matrix default setting --> row 0 : 'A', row '1' : 'C', row 2 : 'G', row '3' : 'T'
  NTP = DnaToMatrix('ACGT')
  matrix_count = np.zeros((4,k))
  dna_matrix = DnaToMatrix(dna)
  for l in range(k):
    for t in range(len(dna)):
      matrix_count = NTP_count(dna_matrix[t,l], l, matrix_count)
  return matrix_count

def profile_matrix(dna, k):
  # Matrix default setting --> row 0 : 'A', row '1' : 'C', row 2 : 'G', row '3' : 'T'
  return count_matrix(dna, k)/(len(dna))

def KmerPerDnaStrings(dna, k):
  #return the k-mers along a given dna string, for all strings in dna
  kmers_dict = defaultdict(list)
  for t in range(DnaToMatrix(dna).shape[0]):
    c = deque()
    for l in range(DnaToMatrix(dna).shape[1]+1-k):
      c.append(DnaToMatrix(dna)[t,l:l+k])
    kmers_dict[t] = c
  return kmers_dict

def StringToProbability(substring, k, profile):

  dict_letter = defaultdict(np.ndarray)

  dict_letter['A'] = np.array([1, 0, 0, 0])
  dict_letter['C'] = np.array([0, 1, 0, 0])
  dict_letter['G'] = np.array([0, 0, 1, 0])
  dict_letter['T'] = np.array([0, 0, 0, 1])

  sub_string_matrix = deque()
  for i in substring:
    sub_string_matrix.append(dict_letter[i])

  mask = np.logical_and(np.array([*sub_string_matrix]).transpose(), np.array([*sub_string_matrix]).transpose())

  prob = profile[mask]
  return np.prod(prob)

def MostProbableKmerMotif(dna,k, profile):
    #return dictionary containing {motif:probability} for the motif scoring the highest probability for each dna string

  max_probability = defaultdict(float)

  for i in KmerPerDnaStrings(dna, k).values():
    list_kmer = np.array(list(i)).tolist()

    p_motif = defaultdict(float)
    p_motif.default_factory = lambda : 1.0

    for j in list_kmer:
      p_motif[''.join(j)] = StringToProbability(j, k, profile)

    max_probability[max(p_motif,key=p_motif.get)] = max(p_motif.values())
  return max_probability


def score_motifs(dna, k, letter_count):
  score = 0
  for i in range(k):
    max_id_letter = max(letter_count[0][i], letter_count[1][i], letter_count[2][i], letter_count[3][i])
    score += (len(dna)-max_id_letter)
  return score


def score_motifs(dna, k, letter_count):
  score = 0
  for i in range(k):
    max_id_letter = max(letter_count[0][i], letter_count[1][i], letter_count[2][i], letter_count[3][i])
    score += (len(dna)-max_id_letter)
  return score

def output(result):

  _j = lambda x: ''.join(list(x))

  return print(*seq(result.tolist()).map(_j))
	import numpy as np
	from functional import seq
	from collections import deque, defaultdict
	import time

	def RandomNumberGenerator(dna, k):
	#return an array (0,t) of random integers, than can be used as starting point for selecting random k-mer along dna strings
	return np.random.randint(0, DnaToMatrix(dna).shape[1]+1-k, size = (DnaToMatrix(dna).shape[0], 1))

	def DnaToMatrix(dna):
	#return a matrix of the list of dna strand
	dna_matrix = deque()
	for i in dna:
	dna_matrix.append(list(i))
	return np.array(dna_matrix)

	def RandomMotif(dna, k):
	#return a matrix of randomely selected k-mer per dna strings
	a = deque()
	for pos, l in zip( range(RandomNumberGenerator(dna, k).shape[0]), RandomNumberGenerator(dna, k)):
	a.append(list(DnaToMatrix(dna)[pos,int(l):int(l)+k]))
	return np.array(a)

	def A_count(l, matrix_init):
	matrix_init[0,l] +=1
	return matrix_init

	def C_count(l, matrix_init):
	matrix_init[1,l] +=1
	return matrix_init

	def G_count(l, matrix_init):
	matrix_init[2,l] +=1
	return matrix_init

	def T_count(l, matrix_init):
	matrix_init[3,l] +=1
	return matrix_init


	def NTP_count(n, l, matrix_init):
	switcher = {
	'A': A_count,
	'C': C_count,
	'G': G_count,
	'T': T_count,
	}
	# Get the function from switcher dictionary
	func = switcher.get(n, lambda: "Invalid nucleotide")
	# Execute the function
	return func(l, matrix_init)

	def count_matrix(dna, k):
	# Matrix default setting --> row 0 : 'A', row '1' : 'C', row 2 : 'G', row '3' : 'T'
	NTP = DnaToMatrix('ACGT')
	matrix_count = np.zeros((4,k))
	dna_matrix = DnaToMatrix(dna)
	for l in range(k):
	for t in range(len(dna)):
	matrix_count = NTP_count(dna_matrix[t,l], l, matrix_count)
	return matrix_count

	def profile_matrix(dna, k):
	# Matrix default setting --> row 0 : 'A', row '1' : 'C', row 2 : 'G', row '3' : 'T'
	return count_matrix(dna, k)/(len(dna))

	def KmerPerDnaStrings(dna, k):
	#return the k-mers along a given dna string, for all strings in dna
	kmers_dict = defaultdict(list)
	for t in range(DnaToMatrix(dna).shape[0]):
	c = deque()
	for l in range(DnaToMatrix(dna).shape[1]+1-k):
	c.append(DnaToMatrix(dna)[t,l:l+k])
	kmers_dict[t] = c
	return kmers_dict

	def StringToProbability(substring, k, profile):

	dict_letter = defaultdict(np.ndarray)

	dict_letter['A'] = np.array([1, 0, 0, 0])
	dict_letter['C'] = np.array([0, 1, 0, 0])
	dict_letter['G'] = np.array([0, 0, 1, 0])
	dict_letter['T'] = np.array([0, 0, 0, 1])

	sub_string_matrix = deque()
	for i in substring:
	sub_string_matrix.append(dict_letter[i])

	mask = np.logical_and(np.array([sub_string_matrix]).transpose(), np.array([sub_string_matrix]).transpose())

	prob = profile[mask]
	return np.prod(prob)

	def MostProbableKmerMotif(dna,k, profile):
	#return dictionary containing {motif:probability} for the motif scoring the highest probability for each dna string

	max_probability = defaultdict(float)

	for i in KmerPerDnaStrings(dna, k).values():
	list_kmer = np.array(list(i)).tolist()

	p_motif = defaultdict(float)
	p_motif.default_factory = lambda : 1.0

	for j in list_kmer:
	p_motif[''.join(j)] = StringToProbability(j, k, profile)

	max_probability[max(p_motif,key=p_motif.get)] = max(p_motif.values())
	return max_probability


	def score_motifs(dna, k, letter_count):
	score = 0
	for i in range(k):
	max_id_letter = max(letter_count[0][i], letter_count[1][i], letter_count[2][i], letter_count[3][i])
	score += (len(dna)-max_id_letter)
	return score


	def score_motifs(dna, k, letter_count):
	score = 0
	for i in range(k):
	max_id_letter = max(letter_count[0][i], letter_count[1][i], letter_count[2][i], letter_count[3][i])
	score += (len(dna)-max_id_letter)
	return score

	def output(result):

	_j = lambda x: ''.join(list(x))

	return print(*seq(result.tolist()).map(_j))