Skip to content

Instantly share code, notes, and snippets.

@yangyushi
Created May 18, 2022 09:17
Show Gist options
  • Save yangyushi/74dc899897918a113f074b206cd0f77a to your computer and use it in GitHub Desktop.
Save yangyushi/74dc899897918a113f074b206cd0f77a to your computer and use it in GitHub Desktop.
toy script to simulate a naïve base calling process
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
def get_transfer_mat(N, p, q):
"""
Calculating the transition matrix P with shape (N, N).
P_ij: the transition probability of two states:
1) location `i` emitting to
2) location `j` emitting
Args:
N (int): the number of cycles
p (float): the probability of lagging
q (float): the probability of leading
Return:
np.ndarray: the transition matrix P
"""
P = np.empty((N, N))
for u in range(N):
for v in range(N):
if u == v:
P[u, v] = p # lagging error, no advance
elif u+1 == v:
P[u, v] = 1 - p - q # normal process
elif u+2 == v:
P[u, v] = q # advance twice, leading error
else:
P[u, v] = 0
return P
def get_phase_mat(trans_mat):
"""
Calculating the phase matrix Q with shape (N, N).
Q_ij: the probability of location `i` emitting light at cycle `j`
Args:
trans_mat (np.ndarray): the transition matrix
Return:
np.ndarray: the phase matrix Q
"""
N = len(trans_mat)
tmp = trans_mat.copy()
Q = np.empty((N, N))
Q[0] = np.zeros(N)
Q[0, 0] = 1
for t in range(N-1):
Q[t+1] = tmp[0]
tmp = trans_mat @ tmp
return Q
def get_fade_mat(N, tau_inv):
"""
Simulate fading matrix D as an exponential decay
Args:
N (int): the number of cycles.
tau_inv (float): the inverse of the relaxation time of the decay,
small value = slow decay
Return:
(np.ndarray): the fading matrix D, shape (N, N)
"""
D = [1]
for _ in range(N - 1):
D.append(np.exp(-tau_inv) * D[-1])
return np.diag(D)
def get_seq_onehot(seq_indices):
"""
Get one-hot encoding from numerical sequences.
"""
N = len(seq_indices)
seq_arr = np.zeros((N, 4))
for i, seq_idx in enumerate(seq_indices):
seq_arr[i][seq_idx] = 1
return seq_arr
if __name__ == "__main__":
N = 50
p, q, tau_inv, eps = 0.02, 0.01, 0.02, 0.005
trans_mat = get_transfer_mat(N, p, q)
phase_mat = get_phase_mat(trans_mat)
fade_mat = get_fade_mat(N, tau_inv)
R = fade_mat @ phase_mat
plt.figure(figsize=(6, 5))
data_str = f'$(p = {p:.2f}, q = {q:.2f})$'
plt.title(r"$ \mathbf{R}_\mathrm{T \times N} $" + data_str)
plt.imshow(R)
plt.ylabel("Cycle Number ($t$)")
plt.xlabel("Terminator Location ($n$)")
cb = plt.colorbar()
cb.set_label("Intensity (a.u.)")
plt.tight_layout()
plt.savefig('R.pdf')
plt.close()
corpus = 'ATCG'
seq_indices = np.random.choice(np.arange(4), size=N)
seq_str = "".join([corpus[i] for i in seq_indices])
S = get_seq_onehot(seq_indices)
I = R @ S + np.random.normal(0, eps, size=(N, 4))
S_hat = np.linalg.pinv(R) @ I
fig = plt.figure(figsize = (10, 7))
ax0, ax1, ax2 = fig.subplots(3, 1, sharex=True)
pos = np.arange(N)
colors = ['olive', 'seagreen', 'crimson', 'royalblue']
marksers = 'sopH'
for i, channel in enumerate(S.T):
ax0.plot(pos, channel, color=colors[i], marker=marksers[i], mfc='w', label=corpus[i])
for i, channel in enumerate(I.T):
ax1.plot(pos, channel, color=colors[i], marker=marksers[i], mfc='w', label=corpus[i])
for i, channel in enumerate(S_hat.T):
ax2.plot(pos, channel, color=colors[i], marker=marksers[i], mfc='w', label=corpus[i])
for ax in (ax0, ax1, ax2):
ax.legend(ncol=4, handlelength=0.8, loc='lower left', columnspacing=1.0, fontsize=14)
ax.set_ylim(-0.25, 1.25)
ax.set_yticks([])
ax.set_ylabel('Intensity (a.u.)')
ax0.set_title("Perfect Signal ($\mathbf{S}$)")
ax1.set_title("Observed Signal ($\mathbf{I} = \mathbf{R} \mathbf{S} + \mathbf{E}$)")
ax2.set_title("Naïvely Deconvolved Signal ($\hat{\mathbf{S}} = \mathbf{R}^{-1} \mathbf{I}$)")
ax2.set_xlabel("Cycle")
plt.tight_layout()
plt.savefig('signal.pdf')
plt.show()
@yangyushi
Copy link
Author

The output

signal

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment