Skip to content

Instantly share code, notes, and snippets.

@Phlya
Last active June 26, 2019 16:30
Show Gist options
  • Save Phlya/95383aa153ff402b90d7a09357bd48cc to your computer and use it in GitHub Desktop.
Save Phlya/95383aa153ff402b90d7a09357bd48cc to your computer and use it in GitHub Desktop.
Compare regions in two Hi-C maps
def fetch_random_means(clr1, exp1, clr2, exp2, region_left, region_right=None, n=1000):
assert clr1.binsize==clr2.binsize
if region_right is None:
region_right = region_left
chrom_left, start_left, end_left = cooler.util.parse_region_string(region_left)
chrom_right, start_right, end_right = cooler.util.parse_region_string(region_right)
assert chrom_left == chrom_right
bin1_left = start_left//clr1.binsize
bin2_left = end_left //clr1.binsize
bin1_right = start_right//clr1.binsize
bin2_right = end_right //clr1.binsize
shift1 = bin1_right - bin1_left
shift2 = bin2_right - bin2_left
len_left = bin2_left - bin1_left
len_right = bin2_right - bin1_right
if (bin1_left < bin1_right) & (bin2_left > bin1_right):
raise ValueError('Regions should not overlap')
exp1 = exp1[exp1['chrom']==chrom_left]['balanced.avg']
exp2 = exp2[exp2['chrom']==chrom_left]['balanced.avg']
expected1 = LazyToeplitz(exp1)[bin1_left:bin2_left, bin1_right:bin2_right]
expected2 = LazyToeplitz(exp2)[bin1_left:bin2_left, bin1_right:bin2_right]
data1 = clr1.matrix(sparse=True, balance=True).fetch(chrom_left)
data1 = sparse.triu(data1, 2).tocsr()
data2 = clr2.matrix(sparse=True, balance=True).fetch(chrom_left)
data2 = sparse.triu(data2, 2).tocsr()
l = data1.shape[0]
allidx = np.arange(l)
np.random.shuffle(allidx)
allidx = np.append([bin1_left], allidx)
region1 = data1[bin1_left:bin2_left, bin1_right:bin2_right].toarray()#/expected1
assert expected1.shape==region1.shape
if region_left==region_right:
ind = np.triu_indices_from(region1, 2)
else:
ind = np.s_[:]
means1 = []
means2 = []
i = 0
while len(means1)<n:
idx = allidx[i]
i += 1
if idx+shift1+len_right > l:
continue
try:
mat1 = (data1[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray())
mean1 = np.nanmean((mat1/expected1)[ind])
mat2 = (data2[idx:idx+len_left, idx+shift1:idx+shift1+len_right].toarray())
mean2 = np.nanmean((mat2/expected2)[ind])
assert mat1.shape==region1.shape and mat2.shape==region1.shape
if np.mean(np.isnan(mat1))>0.1 or mean1==0 or np.mean(np.isnan(mat2))>0.1 or mean2==0:
continue
means1.append(mean1)
means2.append(mean2)
except Exception as e:
print(e)
print(region1.shape, mat1.shape, mat2.shape, expected1.shape, expected2.shape)
print(idx, idx+len_left, idx+shift1, idx+shift1+len_right)
raise e
regmean1 = means1[0]
regmean2 = means2[0]
return regmean1, np.asarray(means1)[1:], regmean2, np.asarray(means2)[1:]
means = fetch_random_means(coolers[sample1], expecteddata[sample1], coolers[sample2], expecteddata[sample2], region, 1000)
ratios = np.log2(means[1]/means[3])
roirat = np.log2(means[0]/means[2])
mn = np.mean(ratios)
std = np.std(ratios)
zscore = (roirat-mn)/std
p = 1- special.ndtr(zscore)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment