Skip to content

Instantly share code, notes, and snippets.

@alanyiu
Last active December 2, 2017 20:03
Show Gist options
  • Save alanyiu/4658400 to your computer and use it in GitHub Desktop.
Save alanyiu/4658400 to your computer and use it in GitHub Desktop.
Nussinov algorithm

Animation representing the Nussinov algorithm for RNA folding. Note that in this implementation, there is no restriction on pairing of neighboring bases.

You can restart the animation with a new input sequence using the input form below the diagram.

<!DOCTYPE html>
<meta charset="utf-8">
<head>
<link href='http://fonts.googleapis.com/css?family=Roboto+Condensed:400,300' rel='stylesheet' type='text/css'>
<link href='http://fonts.googleapis.com/css?family=Roboto:400,700' rel='stylesheet' type='text/css'>
<link rel="stylesheet" type="text/css" href="styles.css">
</head>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
// For a given RNA sequence (array of letters representing nucleotides), returns 1) maximum number
// of base-pairings for each subsequence and 2) a set of base-pairings that could comprise the
// maximally-base-paired folding configuration
function nussinovFold(seq) {
// N[i][j] is the max number of base pairings for a subsequence from base i to base (i+j)
var N = new Array(seq.length);
// s[i][j] is an array of 2-element arrays (e.g., [3, 7]) representing the base-pairings present
// in a maximally-paired folding configuration
var s = new Array(seq.length);
// Initialize N and s arrays
for (var i = 0; i < seq.length; i++) {
N[i] = new Array(seq.length - i);
s[i] = new Array(seq.length - i);
for (var j = 0; j < seq.length - i; j++) {
s[i][j] = new Array(0);
}
}
// i is the position within the sequence; j is the segment length (segment goes from i to i+j)
for (var j = 0; j < seq.length; j++) {
for (var i = 0; i < seq.length - j; i++) {
if (j == 0) { // 1-nucleotide segment:
N[i][j] = 0; // no pairs
} else if (j == 1) { // 2-nucleotide segment:
N[i][j] = pair_table[seq[i]][seq[i+j]]; // determine pairing (e.g., A-U)
if (pair_table[seq[i]][seq[i+j]] == 1) {
s[i][j] = s[i][j].concat([[i, i+j]]);
}
} else { // >2-nucleotide segment:
N[i][j] = N[i+1][j-2] + pair_table[seq[i]][seq[i+j]]; // ends paired, no bifurcation
if (pair_table[seq[i]][seq[i+j]] == 1) {
s[i][j] = s[i+1][j-2].concat([[i, i+j]]);
}
var k = 0, n = 0, h = 0;
while (k < j) { // bifurcation at position k
n = N[i][k] + N[i+k+1][j-1-k];
h = N[i][j] - n; // compare this fold to previous fold
if (h > 0) { // if previous is better
k += h; // increment k by h
} else { // if this fold is better
N[i][j] = n; // update N
s[i][j] = s[i][k].concat(s[i+k+1][j-1-k]); // update s
k += 1; // increment k by just 1
}
}
}
}
}
return([N, s])
}
// For a given sequence, returns an array [0, 1, 2, ... , N-1] where N is the sequence length
function getSeqNums(sequence) {
seq_nums = [];
var seq_len = sequence.length;
for (var i = 0; i < seq_len; i++) {
seq_nums[i] = i;
}
return seq_nums;
}
// For a given sequence string, split it into an array and validates the seqeunce
function getSequence(sequence_str) {
var new_seq = sequence_str.toUpperCase().split("");
var new_seq_len = new_seq.length;
if (new_seq_len < 2) {
document.getElementById("error_txt").innerHTML = "Sequence must be at least 2 nucleotides";
return false;
}
if (new_seq_len > 30) {
document.getElementById("error_txt").innerHTML = "Sequence may not be more than 30 nucleotides";
return false;
}
for (var i = 0; i < new_seq_len; i++) {
if (new_seq[i] != 'A' && new_seq[i] != 'U' && new_seq[i] != 'G' && new_seq[i] != 'C') {
document.getElementById("error_txt").innerHTML = "Sequence may only contain A, U, G, and C";
return false;
}
}
document.getElementById("error_txt").innerHTML = "";
return new_seq;
}
// Collect all base-pairings that will be used in the animation.
// Leave the "final" pairs to the end--these are the ones displayed at the end of the animation for
// the folding configuration of the entire sequence. We want these final pairs at the end of the
// pairs array so that their arcs will show up on top (higher z-index).
function generatePairs(seq, maxBP) {
var final_pairs = maxBP[1][0][seq_len-1];
var final_pairs_len = final_pairs.length;
pairs = [];
for (var i = 0; i < seq_len; i++) {
for (var j = i+1; j < seq_len; j++) {
if (pair_table[seq[i]][seq[j]] == 1) {
for (var k = 0; k < final_pairs_len; k++) {
if ((i == final_pairs[k][0] && j == final_pairs[k][1]) ||
(i == final_pairs[k][1] && j == final_pairs[k][0])) {
break; // exclude final pairs from the pairs array
}
}
pairs = pairs.concat([[i, j]]);
}
}
}
pairs = pairs.concat(final_pairs);
return [pairs, final_pairs];
}
// From the array of pairs, generate one arc object for each pair
function generateArcs() {
arcs = [];
var pairs_len = pairs.length;
for (var i = 0; i < pairs_len; i++) {
var arc_i = {};
arc_i.startAngle = -Math.PI/2;
arc_i.endAngle = Math.PI/2;
arc_i.innerRadius = Math.abs(pairs[i][1]-pairs[i][0])*(x(1)-x(0))/2 - 1;
arc_i.outerRadius = Math.abs(pairs[i][1]-pairs[i][0])*(x(1)-x(0))/2 + 1;
arc_i.source = pairs[i][0];
arc_i.target = pairs[i][1];
arcs.push(arc_i);
}
return(arcs);
}
// Draw initial animation elements: bases, connecting line, arcs
function drawElements() {
svg.selectAll("g").remove(); // clear the canvas
var line_g = svg.append("g") // container for connecting lines
.attr("class", "line_g");
line_g.append("line") // grey "inactive" line
.attr("id", "inactive_line")
.attr("x1", x(0) + x_step/2)
.attr("y1", 290)
.attr("x2", x(seq.length-1) + x_step/2)
.attr("y2", 290)
.style("stroke", "#ccc");
line_g.append("line") // black "active" line
.attr("id", "active_line")
.attr("x1", x(0) + x_step/2)
.attr("y1", 290)
.attr("x2", x(seq.length-1) + x_step/2)
.attr("y2", 290)
.style("stroke", "#000");
var arc_g = svg.append("g") // container for arcs
.attr("class", "arcs");
arc_g.selectAll("path.arc") // arcs -- not adding svg d attribute until animation
.data(arcs)
.enter().append("path")
.attr("id", function(d, i) { return "arc-" + d.source + "-" + d.target; })
.attr("class", "arc")
.attr("transform", function(d) {
return "translate(" + ((x(d.source) + x(d.target))/2 + x_step/2) + ",286)";
})
.style("stroke", "none")
.style("fill", function(d) {
return "AU".indexOf(seq[d.source]) > -1 ? "#f63" : "#36f";
})
.style("opacity", 0);
var bases_g = svg.append("g") // container for bases
.attr("class", "bases");
var base_svgs = bases_g.selectAll("svg.base") // container for each base (circle, text)
.data(seq)
.enter().append("svg")
.attr("class", "base_svg")
.attr("y", 280)
.attr("x", function(d, i) { return x(i); })
.attr("height", 50)
.attr("width", x_step);
base_svgs.append("circle") // circle for base
.attr("class", "base_circle")
.attr("id", function(d, i) { return "circle-" + i; })
.attr("r", "5px")
.attr("cx", x_step/2)
.attr("cy", 10);
base_svgs.append("text") // text label for base
.attr("class", "base")
.attr("id", function(d, i) { return "text-" + i; })
.text(function(d) { return(d); })
.style("fill", function(d, i) { return "AU".indexOf(seq[i]) > -1 ? "#c63" : "#36c"; })
.style("text-anchor", "middle")
.attr("y", 38)
.attr("x", x_step/2);
}
// function updateMarker(marker, active, pos, seg_length) {
// if (active) {
// marker.style("opacity", 1);
// } else {
// marker.style("opacity", 0.5);
// }
// }
// Transition function: update an arc's properties based on if it's active or not.
// 'revert' determines whether it should revert back to the original state after the update.
function updateArc(arc, active, t_duration, revert) {
arc.duration(t_duration);
if (active) {
arc.style("opacity", 1)
.attr("d", arc_fat);
} else {
arc.style("opacity", 0.15)
.attr("d", arc_thin);
}
if (revert) {
arc.each("end", function() {
d3.select(this)
.transition()
.delay(t_animation)
.call(updateArc, !active, t_duration, false);
});
}
}
// Set the final (static) display after the animation is all finished. Show the final pair arcs
// and fade out the non-final arcs.
function endAnimation(seq, N, s, final_pairs) {
svg.select("#active_marker_rect")
.transition()
.duration(500)
.style("opacity", 0);
svg.selectAll("text.base")
.transition()
.duration(500)
.style("opacity", 1);
var final_pairs_len = final_pairs.length;
for (var k = 0; k < final_pairs_len; k++) {
svg.select("#arc-" + final_pairs[k][0] + "-" + final_pairs[k][1])
.transition().call(updateArc, true, 500, false);
}
}
// Update the segment marker (which marks which sub-sequence is active during each animation step)
function updateMarker(marker, i, j, opacity, t_duration) {
marker.duration(t_duration)
.attr("x", x(i) + x_step/2 - 9)
.attr("width", j*(x(1)-x(0))+18)
.style("opacity", opacity);
}
// Animate the marker back to the beginning of the sequenge and increase sub-sequence length by 1.
// When finished, call stepAnimation to continue the animation.
function resetMarker(seq, N, s, final_pairs, i, j) {
svg.select("#active_marker_rect")
.style("opacity", 0.5);
svg.select("#active_line")
.attr("x1", x(0) + x_step/2)
.attr("x2", x(0) + x_step/2);
svg.selectAll("circle.base_circle")
.style("fill", "#ccc");
svg.selectAll("text.base")
.style("opacity", 0.25);
svg.select("#active_marker_rect")
.transition()
.call(updateMarker, 0, j-1, 0.5, t_animation)
.each("end", function() {
d3.select(this)
.transition()
.call(updateMarker, i, j, 0.5, t_animation)
.each("end", function() {
anim_timeout = window.setTimeout(function() {
stepAnimation(seq, N, s, final_pairs, i, j);
}, t_animation)
});
});
}
// Take one step forward in the animation
function stepAnimation(seq, N, s, final_pairs, i, j) {
svg.select("#active_marker_rect") // update the marker position/size
.transition().call(updateMarker, i, j, 1, 0);
svg.select("#active_line") // update active line position/size
.attr("x1", x(i) + x_step/2)
.attr("x2", x(i+j) + x_step/2);
svg.selectAll("circle.base_circle") // color active vs. inactive circles
.style("fill", function(d, k) {
return ((k < i) || (k > (i+j))) ? "#ccc" : "#000";
});
svg.selectAll("text.base") // fill active vs. inactive text
.style("opacity", function(d, k) {
return ((k < i) || (k > (i+j))) ? 0.25 : 1;
});
s_len = s[i][j].length;
for (var k = 0; k < s_len; k++) { // show all arcs for this sub-sequence
svg.select("#arc-" + s[i][j][k][0] + "-" + s[i][j][k][1])
.transition().call(updateArc, true, 0, true);
}
i++;
if (i == seq_len - j) {
j++;
if (j == seq_len) {
anim_timeout = window.setTimeout(function() {
endAnimation(seq, N, s, final_pairs); // end the animation
}, t_animation);
return;
}
i = 0;
anim_timeout = window.setTimeout(function() { // reset the marker and increment sub-
resetMarker(seq, N, s, final_pairs, i, j); // sequence; animation continues
}, t_animation);
return;
}
anim_timeout = window.setTimeout(function() { // increment position in sequence;
stepAnimation(seq, N, s, final_pairs, i, j); // animation continues
}, t_animation);
}
// Initialize elements for animation and kick off the first step
function startAnimation(seq, N, s, final_pairs) {
svg.select("#active_marker_rect")
.style("opacity", 1);
stepAnimation(seq, N, s, final_pairs, 0, 1);
}
// Update the RNA sequence and start off a new animation
function updateSequence(sequence_str) {
var new_seq = getSequence(sequence_str);
if (new_seq != false) {
seq = new_seq, seq_len = seq.length;
seq_nums = getSeqNums(seq);
x.domain(seq_nums);
x_step = x(1) - x(0);
maxBP = nussinovFold(seq);
var pairs_arr = generatePairs(seq, maxBP);
pairs = pairs_arr[0], final_pairs = pairs_arr[1];
arcs = generateArcs(pairs);
window.clearTimeout(anim_timeout);
drawElements();
startAnimation(seq, maxBP[0], maxBP[1], final_pairs);
}
}
var seq = [], seq_nums = [], seq_len = 0;
var maxBP = [];
var pairs = [], final_pairs = [];
var arcs = [];
var x_step = 0; // distance between bases (to be calculated later)
var t_animation = 250; // time (ms) between animation steps
var anim_timeout = {}; // windowTimeout object (to be assigned later)
// Allowed (1) and disallowed (0) base-pairings
var pair_table = {'A': {'A':0, 'C':0, 'G':0, 'U':1},
'C': {'A':0, 'C':0, 'G':1, 'U':0},
'G': {'A':0, 'C':1, 'G':0, 'U':0},
'U': {'A':1, 'C':0, 'G':0, 'U':0}};
var x = d3.scale.ordinal()
.rangeRoundBands([0, 600], 0);
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom");
var svg = d3.select("body").append("svg")
.attr("width", 600)
.attr("height", 350);
// Thick arc for "active" pairs
var arc_fat = d3.svg.arc()
.startAngle(-Math.PI/2)
.endAngle(Math.PI/2)
.innerRadius(function(d) { return (Math.abs(d.target-d.source)*(x(1)-x(0)))/2 - 3; })
.outerRadius(function(d) { return (Math.abs(d.target-d.source)*(x(1)-x(0)))/2 + 3; });
// Thin arc for "inactive" pairs
var arc_thin = d3.svg.arc()
.startAngle(-Math.PI/2)
.endAngle(Math.PI/2)
.innerRadius(function(d) { return (Math.abs(d.target-d.source)*(x(1)-x(0)))/2 - 1; })
.outerRadius(function(d) { return (Math.abs(d.target-d.source)*(x(1)-x(0)))/2 + 1; });
// Animation position marker (indicates active sub-sequence)
svg.append("rect")
.attr("class", "active_marker")
.attr("id", "active_marker_rect")
.attr("y", 282)
.attr("height", 16)
.attr("rx", 9)
.attr("ry", 9)
.style("fill", "#fd3")
.style("opacity", 1);
</script>
<div id="form_div">
<form onsubmit="updateSequence(this.sequence.value); return false;" id="seq_form">
<input type="text" name="sequence" id="seq_input" placeholder="Enter new sequence"/>
<input type="submit" value="Update" id="submit"/>
</form>
</div>
<div id="error_txt"></div>
<script>
// Initial example sequence
updateSequence("AACGUUCCGU");
</script>
</body>
</html>
body {
text-align: center;
align: center;
}
text.base {
font-family: "Roboto Condensed", sans-serif;
font-size: 21px;
fill: #000;
}
.arc path {
stroke: #000;
}
#form_div {
width: 700px;
margin: 0px auto;
}
#seq_input {
width: 400px;
font-family: "Roboto Condensed", sans-serif;
font-weight: 300;
font-size: 20px;
border: none;
border-bottom: #000 1px dotted;
background-color: transparent;
text-align: center;
}
#seq_input:focus {
border-bottom: #00a 2px solid;
outline: none;
}
#submit {
font-family: "Roboto", sans-serif;
font-size: 20px;
width: 80px;
height: 32px;
background: #acf;
border: none;
}
#error_txt {
font-family: "Roboto", sans-serif;
font-size: 14px;
color: #f00;
position: relative;
top: 10px;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment