Skip to content

Instantly share code, notes, and snippets.

View Alex-Rosenberg's full-sized avatar

Alex Rosenberg Alex-Rosenberg

  • University of Washington
  • Seattle, WA
View GitHub Profile
import scipy.io as sio
import pandas as pd
#Load Data
data = sio.loadmat('<your path>/GSM3017261_150000_CNS_nuclei.mat')
#Digital Expression Matrix
DGE = data['DGE']
#Genes
import scipy.io as sio
import pandas as pd
#Load Data
data = sio.loadmat('<your path>/GSM3017261_150000_CNS_nuclei.mat')
#Digital Expression Matrix
DGE = data['DGE']
#Genes
@Alex-Rosenberg
Alex-Rosenberg / merge_seqs.py
Last active December 15, 2015 14:42
Combines sequences with less than N hamming distance using counts of each sequence.
def hamdist(str1, str2):
"""Count the # of differences between equal length strings str1 and str2"""
diffs = 0
for ch1, ch2 in zip(str1, str2):
if ch1 != ch2:
diffs += 1
return diffs
def remove_duplicates_round(df,hamm_thres=4,merge_counts=False):
seqs = list(df.Seq.values)