Last active
March 2, 2018 02:31
-
-
Save yjzhang/229f847dd7bdb8dcae2b492cad1dbd4d to your computer and use it in GitHub Desktop.
Creates a DNA motif logo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# code to draw a DNA sequence logo... | |
import matplotlib.pyplot as plt | |
from matplotlib import transforms | |
import matplotlib.patheffects | |
import numpy as np | |
def entropy(row): | |
return -sum(x*np.log2(x+0.0000001) for x in row) | |
# ATCG | |
def calc_logo(data): | |
""" | |
Data format: table of relative frequencies - first dimension is length, | |
second dimension is character. Each entry is the frequency of that | |
character. | |
The output is the frequency * entropy for each position. | |
""" | |
hmax = 2.0 | |
num_chars = data.shape[1] | |
length = data.shape[0] | |
logo = np.array(data) | |
for i in range(length): | |
ent = hmax - entropy(data[i,:]) | |
logo[i,:] *= ent | |
return logo | |
def draw_logo(data, fixed_scale=True, ax=None): | |
logo_data = calc_logo(data) | |
if ax is None: | |
ax = plt.gca() | |
fig = ax.figure | |
colors = ['r', 'g', 'b', 'y'] | |
bars = [] | |
width = 0.8 | |
ind = np.arange(logo_data.shape[0]) | |
cumulative_heights = np.zeros(logo_data[:,0].shape) | |
for i, c in enumerate(colors): | |
nuc = logo_data[:,i] | |
if i == 0: | |
p = ax.bar(ind, nuc, width, color=c) | |
else: | |
p = ax.bar(ind, nuc, width, color=c, bottom = cumulative_heights) | |
cumulative_heights += nuc | |
bars.append(p) | |
if fixed_scale: | |
ax.set_ylim(0, 2) | |
ax.legend((b[0] for b in bars), ['A', 'T', 'C', 'G']) | |
ax.set_ylabel('Information (bits)') | |
return bars | |
# based on: https://github.com/saketkc/notebooks/blob/master/python/Sequence Logo Python -- Any font.ipynb | |
class Scale(matplotlib.patheffects.RendererBase): | |
def __init__(self, sx, sy=None): | |
self._sx = sx | |
self._sy = sy | |
def draw_path(self, renderer, gc, tpath, affine, rgbFace): | |
affine = affine.identity().scale(self._sx, self._sy)+affine | |
renderer.draw_path(gc, tpath, affine, rgbFace) | |
def draw_logo_2(data, fixed_scale=True, ax=None): | |
""" | |
Draws a logo with letters instead of bars. | |
""" | |
logo_data = calc_logo(data) | |
if ax is None: | |
ax = plt.gca() | |
fig = ax.figure | |
ax.set_xlim(0, logo_data.shape[0]+0.5) | |
ax.set_ylim(0, 2) | |
ax.set_ylabel('Information (bits)') | |
#fig, ax = plt.subplots(figsize=(logo_data.shape[0], 2.5)) | |
colors = ['r', 'g', 'b', 'y'] | |
bases = ['A', 'T', 'C', 'G'] | |
trans_offset = transforms.offset_copy(ax.transData, | |
fig=fig, | |
x=1, | |
y=0, | |
units='dots') | |
scale = 1.5 | |
for i in range(logo_data.shape[0]): | |
scores = logo_data[i,:] | |
for b, c, s in zip(bases, colors, scores): | |
txt = ax.text(i+1, 0, b, transform=trans_offset, fontsize=100, ha='center', color=c) | |
txt.set_path_effects([Scale(0.8, s*scale)]) | |
yshift = s*scale | |
trans_offset = transforms.offset_copy(trans_offset, | |
fig=fig, | |
y=yshift) | |
trans_offset = transforms.offset_copy(ax.transData, | |
fig=fig, | |
y=0, | |
units='points') | |
return fig, ax |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment