Alex Rosenberg Alex-Rosenberg

## read_data_GSE110823.py
import scipy.io as sio
import pandas as pd

#Load Data
data = sio.loadmat('<your path>/GSM3017261_150000_CNS_nuclei.mat')

#Digital Expression Matrix
DGE = data['DGE']

#Genes

## read_data_GSE110823.py
import scipy.io as sio
import pandas as pd

#Load Data
data = sio.loadmat('<your path>/GSM3017261_150000_CNS_nuclei.mat')

#Digital Expression Matrix
DGE = data['DGE']

#Genes

## merge_seqs.py
def hamdist(str1, str2):
    """Count the # of differences between equal length strings str1 and str2"""
    diffs = 0
    for ch1, ch2 in zip(str1, str2):
        if ch1 != ch2:
            diffs += 1
    return diffs

def remove_duplicates_round(df,hamm_thres=4,merge_counts=False):
    seqs = list(df.Seq.values)
	import scipy.io as sio
	import pandas as pd

	#Load Data
	data = sio.loadmat('<your path>/GSM3017261_150000_CNS_nuclei.mat')

	#Digital Expression Matrix
	DGE = data['DGE']

	#Genes
	def hamdist(str1, str2):
	"""Count the # of differences between equal length strings str1 and str2"""
	diffs = 0
	for ch1, ch2 in zip(str1, str2):
	if ch1 != ch2:
	diffs += 1
	return diffs

	def remove_duplicates_round(df,hamm_thres=4,merge_counts=False):
	seqs = list(df.Seq.values)