Skip to content

Instantly share code, notes, and snippets.

@yjzhang
Last active March 2, 2018 02:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yjzhang/229f847dd7bdb8dcae2b492cad1dbd4d to your computer and use it in GitHub Desktop.
Save yjzhang/229f847dd7bdb8dcae2b492cad1dbd4d to your computer and use it in GitHub Desktop.
Creates a DNA motif logo
# code to draw a DNA sequence logo...
import matplotlib.pyplot as plt
from matplotlib import transforms
import matplotlib.patheffects
import numpy as np
def entropy(row):
return -sum(x*np.log2(x+0.0000001) for x in row)
# ATCG
def calc_logo(data):
"""
Data format: table of relative frequencies - first dimension is length,
second dimension is character. Each entry is the frequency of that
character.
The output is the frequency * entropy for each position.
"""
hmax = 2.0
num_chars = data.shape[1]
length = data.shape[0]
logo = np.array(data)
for i in range(length):
ent = hmax - entropy(data[i,:])
logo[i,:] *= ent
return logo
def draw_logo(data, fixed_scale=True, ax=None):
logo_data = calc_logo(data)
if ax is None:
ax = plt.gca()
fig = ax.figure
colors = ['r', 'g', 'b', 'y']
bars = []
width = 0.8
ind = np.arange(logo_data.shape[0])
cumulative_heights = np.zeros(logo_data[:,0].shape)
for i, c in enumerate(colors):
nuc = logo_data[:,i]
if i == 0:
p = ax.bar(ind, nuc, width, color=c)
else:
p = ax.bar(ind, nuc, width, color=c, bottom = cumulative_heights)
cumulative_heights += nuc
bars.append(p)
if fixed_scale:
ax.set_ylim(0, 2)
ax.legend((b[0] for b in bars), ['A', 'T', 'C', 'G'])
ax.set_ylabel('Information (bits)')
return bars
# based on: https://github.com/saketkc/notebooks/blob/master/python/Sequence Logo Python -- Any font.ipynb
class Scale(matplotlib.patheffects.RendererBase):
def __init__(self, sx, sy=None):
self._sx = sx
self._sy = sy
def draw_path(self, renderer, gc, tpath, affine, rgbFace):
affine = affine.identity().scale(self._sx, self._sy)+affine
renderer.draw_path(gc, tpath, affine, rgbFace)
def draw_logo_2(data, fixed_scale=True, ax=None):
"""
Draws a logo with letters instead of bars.
"""
logo_data = calc_logo(data)
if ax is None:
ax = plt.gca()
fig = ax.figure
ax.set_xlim(0, logo_data.shape[0]+0.5)
ax.set_ylim(0, 2)
ax.set_ylabel('Information (bits)')
#fig, ax = plt.subplots(figsize=(logo_data.shape[0], 2.5))
colors = ['r', 'g', 'b', 'y']
bases = ['A', 'T', 'C', 'G']
trans_offset = transforms.offset_copy(ax.transData,
fig=fig,
x=1,
y=0,
units='dots')
scale = 1.5
for i in range(logo_data.shape[0]):
scores = logo_data[i,:]
for b, c, s in zip(bases, colors, scores):
txt = ax.text(i+1, 0, b, transform=trans_offset, fontsize=100, ha='center', color=c)
txt.set_path_effects([Scale(0.8, s*scale)])
yshift = s*scale
trans_offset = transforms.offset_copy(trans_offset,
fig=fig,
y=yshift)
trans_offset = transforms.offset_copy(ax.transData,
fig=fig,
y=0,
units='points')
return fig, ax
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment