Mihail Zdravkov mzdravkov

## build_sequences2.py
def build_sequences(time_series, valid_periods, categories, train_size, test_size):
  """
  Creates all possible test sequences with size <test_size> which have
  a training sequence of <train_size> in front.
  """
  X = []
  y = []
  final_categories = []
  for ts, range, category in zip(time_series, valid_periods, categories):
    valid_ts = cut_valid(ts, range)

## fourier_time_series_forecasting.py
def build_sequences(time_series, valid_periods, categories, train_size, test_size):
  """
  Creates all possible test sequences with size <test_size> which have
  a training sequence of <train_size> in front.
  """
  X = []
  y = []
  final_categories = []
  for ts, range, category in zip(time_series, valid_periods, categories):
    valid_ts = cut_valid(ts, range)

## build_sequences.py
def build_sequences(time_series, valid_periods, categories, train_size, test_size):
  """
  Creates all possible test sequences with size <test_size> which have
  a training sequence of <train_size> in front.
  """
  X = []
  y = []
  final_categories = []
  for ts, range, category in zip(time_series, valid_periods, categories):
    valid_ts = cut_valid(ts, range)

## mobilenet_model.py
from keras.constraints import max_norm

def get_res_blocks(definitions, input, l2_factor=0.0001, kernel_constraint_norm=2.0):
    hiddenx = tf.keras.layers.Dense(definitions[0],
                         activation='relu',
                         kernel_regularizer=tf.keras.regularizers.l2(l2_factor),
                         kernel_constraint=max_norm(kernel_constraint_norm),
                         bias_initializer=he_init)(input)
    hiddeny = tf.keras.layers.Dense(definitions[0],
                        #  activation='relu',

## Dataset.csv
ID,GENDER,AGE,RACE/ETHNICITY,Diagnosis,MD,Assignment,EMR,LOS,RAR,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD,# Psychotropic Medications,# Administrations,Therapeutic Guidances
1,F,49,W,"MDD, Recurrent, Unspecified",L,G,C,70,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,3,6,EG
2,F,21,W,"MDD, Recurrent, Unspecified",A,G,C,309,0,0,0,0,1,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,3,27,CT
3,M,28,L,"MDD, Single Episode, Severe With Psychotic Features",I,G,C,376,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,5,0,26,0,0,0,8,0,0,0,15,0,4,0,0,6,64,CT
4,F,63,L,Depressive Disorder NOS,L,G,C,115,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,3,14,CT
5,M,34,L,"MDD, Single Episode, Severe With Psychotic Features",G,S,C,120,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,1,0,0,0,0,0,0,0,0,5,0,0,0,0,0,3,11,EG
6,F,24,L,"MDD, Single Episode,Severe Without Psychotic Features",T,S,C,120,0,0,5,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,5,0,0,4,13,EG
7,M,42,W,"MDD, Single Episode, Unspecified",

## biostatistics_project.r
Dataset <- read.csv("Dataset.csv", stringsAsFactors=TRUE, header=TRUE)

Dataset <- Dataset[Dataset$Assignment != "",]

View(Dataset)

drugs <- data.frame(
    Name = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "AB", "AC", "AD"),
    Color = c("Red", "Red", "Green", "Yellow", "Red", "Yellow", "Red", "Red", "Red", "Red", "Green", "Red", "Red", "Red", "Green", "Red", "Red", "Green", "Yellow", "Red", "Red", "Yellow", "Yellow", "Red", "Yellow", "Red", "Yellow", "Red", "Red", "Green")
)

## dominant.awk
function allele_in_parent(allele, parent_alleles) {
  return allele == parent_alleles[1] || allele == parent_alleles[2];
}

/^#/ { print $0; }
/^[^#]/ {
  split($12, cfields, ":");
  split(cfields[1], child, "/");
  if (child[1] != 0 || child[2] != 0) print $0;
}

## recessive.awk
function allele_in_parent(allele, parent_alleles) {
	return allele == parent_alleles[1] || allele == parent_alleles[2];
}

/^#/ { print $0; }
/^[^#]/ {
  split($10, mfields, ":");
  split(mfields[1], mother, "/");
  split($11, ffields, ":");
  split(ffields[1], father, "/");

## genomics_disease_annotation_pipeline.sh
# recessive.awk can be found here: https://gist.github.com/mzdravkov/443868e111263f8521268434436434e4
# dominant.awk can be found here: https://gist.github.com/mzdravkov/85409f4d0e7d5234f350a9a814a283bf

if [[ $# -lt 2 ]]; then
	echo "./pipeline.sh CASE_NUMBER recessive/dominant"
	exit;
fi

test_case=$1
pattern=$2

## unparsable.psl
psLayout version 3

match	mis- 	rep. 	N's	Q gap	Q gap	T gap	T gap	strand	Q        	Q   	Q    	Q  	T        	T   	T    	T  	block	blockSizes 	qStarts	 tStarts
     	match	match	   	count	bases	count	bases	      	name     	size	start	end	name     	size	start	end	count
---------------------------------------------------------------------------------------------------------------------------------------------------------------
1034	214	0	0	6	-127	23	316	+	XP_049281413.1	1152	31	1152	XP_035914594.1	1593	29	1593	26	47,95,21,95,77,35,3,27,44,76,35,31,45,41,24,42,105,57,58,38,33,71,24,48,22,54,	31,84,181,202,297,302,337,340,375,419,495,530,561,606,647,671,713,818,875,861,899,932,1003,1027,1075,1098,	29,81,176,231,355,379,462,470,497,613,699,739,775,825,876,905,952,1191,1258,1263,1306,1349,1430,1459,1515,1539,
841	148	0	0	5	-93	19	268	+	XP_049281413.1	1152	181	1077	XP_035914594.1	1593	219	1476	23	18,53,74,27,25,76,35,28,24,41,35,31,87,25,23,48,39,46,76,38,52,71,17,	181,199,266,340,375,400,476,511,544,568,609,644,675,6
	def build_sequences(time_series, valid_periods, categories, train_size, test_size):
	"""
	Creates all possible test sequences with size <test_size> which have
	a training sequence of <train_size> in front.
	"""
	X = []
	y = []
	final_categories = []
	for ts, range, category in zip(time_series, valid_periods, categories):
	valid_ts = cut_valid(ts, range)
	from keras.constraints import max_norm

	def get_res_blocks(definitions, input, l2_factor=0.0001, kernel_constraint_norm=2.0):
	hiddenx = tf.keras.layers.Dense(definitions[0],
	activation='relu',
	kernel_regularizer=tf.keras.regularizers.l2(l2_factor),
	kernel_constraint=max_norm(kernel_constraint_norm),
	bias_initializer=he_init)(input)
	hiddeny = tf.keras.layers.Dense(definitions[0],
	# activation='relu',
	ID,GENDER,AGE,RACE/ETHNICITY,Diagnosis,MD,Assignment,EMR,LOS,RAR,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD,# Psychotropic Medications,# Administrations,Therapeutic Guidances
	1,F,49,W,"MDD, Recurrent, Unspecified",L,G,C,70,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,3,6,EG
	2,F,21,W,"MDD, Recurrent, Unspecified",A,G,C,309,0,0,0,0,1,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,3,27,CT
	3,M,28,L,"MDD, Single Episode, Severe With Psychotic Features",I,G,C,376,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,5,0,26,0,0,0,8,0,0,0,15,0,4,0,0,6,64,CT
	4,F,63,L,Depressive Disorder NOS,L,G,C,115,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0,0,3,14,CT
	5,M,34,L,"MDD, Single Episode, Severe With Psychotic Features",G,S,C,120,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,1,0,0,0,0,0,0,0,0,5,0,0,0,0,0,3,11,EG
	6,F,24,L,"MDD, Single Episode,Severe Without Psychotic Features",T,S,C,120,0,0,5,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,5,0,0,4,13,EG
	7,M,42,W,"MDD, Single Episode, Unspecified",
	Dataset <- read.csv("Dataset.csv", stringsAsFactors=TRUE, header=TRUE)

	Dataset <- Dataset[Dataset$Assignment != "",]

	View(Dataset)

	drugs <- data.frame(
	Name = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "AB", "AC", "AD"),
	Color = c("Red", "Red", "Green", "Yellow", "Red", "Yellow", "Red", "Red", "Red", "Red", "Green", "Red", "Red", "Red", "Green", "Red", "Red", "Green", "Yellow", "Red", "Red", "Yellow", "Yellow", "Red", "Yellow", "Red", "Yellow", "Red", "Red", "Green")
	)
	function allele_in_parent(allele, parent_alleles) {
	return allele == parent_alleles[1] \|\| allele == parent_alleles[2];
	}

	/^#/ { print $0; }
	/^[^#]/ {
	split($12, cfields, ":");
	split(cfields[1], child, "/");
	if (child[1] != 0 \|\| child[2] != 0) print $0;
	}
	# recessive.awk can be found here: https://gist.github.com/mzdravkov/443868e111263f8521268434436434e4
	# dominant.awk can be found here: https://gist.github.com/mzdravkov/85409f4d0e7d5234f350a9a814a283bf

	if [[ $# -lt 2 ]]; then
	echo "./pipeline.sh CASE_NUMBER recessive/dominant"
	exit;
	fi

	test_case=$1
	pattern=$2
	psLayout version 3

	match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStarts tStarts
	match match count bases count bases name size start end name size start end count
	---------------------------------------------------------------------------------------------------------------------------------------------------------------
	1034 214 0 0 6 -127 23 316 + XP_049281413.1 1152 31 1152 XP_035914594.1 1593 29 1593 26 47,95,21,95,77,35,3,27,44,76,35,31,45,41,24,42,105,57,58,38,33,71,24,48,22,54, 31,84,181,202,297,302,337,340,375,419,495,530,561,606,647,671,713,818,875,861,899,932,1003,1027,1075,1098, 29,81,176,231,355,379,462,470,497,613,699,739,775,825,876,905,952,1191,1258,1263,1306,1349,1430,1459,1515,1539,
	841 148 0 0 5 -93 19 268 + XP_049281413.1 1152 181 1077 XP_035914594.1 1593 219 1476 23 18,53,74,27,25,76,35,28,24,41,35,31,87,25,23,48,39,46,76,38,52,71,17, 181,199,266,340,375,400,476,511,544,568,609,644,675,6