Skip to content

Instantly share code, notes, and snippets.

@Phlya
Last active November 11, 2018 17:16
Show Gist options
  • Save Phlya/340af6c45900902310a7bb8d9cc60537 to your computer and use it in GitHub Desktop.
Save Phlya/340af6c45900902310a7bb8d9cc60537 to your computer and use it in GitHub Desktop.
def dedup(dots, hiccups_filter=True):
newdots = []
ress = list(sorted(set(dots['res'])))
for chrom in sorted(set(dots['chrom1'])):
chromdots = dots[dots['chrom1']==chrom].sort_values(['start1', 'start2']).reset_index(drop=True)
for res in ress:
chromdots['Supported_%s' % res] = (chromdots['res']==res)
tree = spatial.cKDTree(chromdots[['start1', 'start2']]) #Not sure what's the best coordinate to use, of the strongest pixel, centroid, or the center of the dot?
drop = []
for i, j in tree.query_pairs(r=50000):
ires = chromdots.at[i, 'res']
jres = chromdots.at[j, 'res']
chromdots.at[j, 'Supported_%s' % ires] = True
chromdots.at[i, 'Supported_%s' % jres] = True
if ress[-1] in (ires, jres) or abs(chromdots.at[j, 'start1']-chromdots.at[i, 'start1'])<=20000:
if ires>jres:
drop.append(i)
else:
drop.append(j)
newdots.append(chromdots.drop(drop))
deduped = pd.concat(newdots).sort_values(['chrom1', 'start1', 'start2'])
if hiccups_filter:
l = len(deduped)
deduped = deduped[~((deduped['start2']-deduped['start1']>100000) & (~np.any(deduped[['Supported_%s'%res for res in ress[1:]]], axis=1)))]
print(l-len(deduped), 'loops filtered out as unreliable %s resolution calls' % ress[0])
return deduped
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment