Charting Word Co-occurences like a graph in D3.js
var customCss = 'svg{border:1px solid red;display:block;background:#fcfcff}' | |
+'.node circle{fill:#aaa;stroke:#fff;stroke-width:1px}.link{stroke:#335;stroke-width:1px}' | |
+'.red circle{fill:#439}.node text{pointer-events:none;font:12px sans-serif;fill:#338}'; | |
document.write("<style>"+customCss+"</style>"); | |
var head = document.getElementsByTagName('head')[0]; | |
var s = document.createElement('style'); | |
s.setAttribute('type', 'text/css'); | |
if (s.styleSheet) { // IE | |
s.styleSheet.cssText = customCss; | |
} else { // the world | |
s.appendChild(document.createTextNode(customCss)); | |
} | |
head.appendChild(s); | |
var stops = [ "a", "about", "above", "after", "again", "against", | |
"all", "also", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"]; | |
function getRandomInt(min, max) { | |
min = Math.ceil(min); | |
max = Math.floor(max); | |
return Math.floor(Math.random() * (max - min)) + min; | |
} | |
function uniq(a) { | |
t = a.slice(); | |
return t.sort().filter(function(item, pos, ary) { | |
return !pos || item != ary[pos - 1]; | |
}) | |
} | |
var svg = d3.select("svg"), | |
width = +svg.attr("width"), | |
height = +svg.attr("height"); | |
var nodes = []; | |
var links = []; | |
var wordVecs = []; | |
var words = []; | |
d3.selectAll("p").each(function(e){ | |
var tt = d3.select(this).text().toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g,"").replace(/\s{2,}/g," ").trim().split(" ").filter(function(x) { return stops.indexOf(x) < 0 }); | |
wordVecs.push(tt); | |
words = words.concat(tt); | |
}); | |
var wordsU = uniq(words); | |
var numNodes = wordsU.length; | |
var wordsFreq = []; | |
for(var i = 0; i < numNodes; i++) { | |
var w = wordsU[i]; | |
var count =0; | |
for (var j = 0; j < wordVecs.length; j++) { | |
if(wordVecs[j].indexOf(w) > -1 ){ | |
count++; | |
} | |
} | |
wordsFreq.push(count); | |
} | |
for (var i = 0; i < numNodes; i++) { | |
nodes.push({ | |
x: getRandomInt(1, width), | |
y: getRandomInt(1, height), | |
r: wordsFreq[i] , | |
name: wordsU[i] | |
}) | |
} | |
for (var i = 0; i < words.length - 1; i++) { | |
links.push({ | |
source: wordsU.indexOf(words[i]), | |
target: wordsU.indexOf(words[i+1]) | |
}) | |
} | |
var force = d3.layout.force() | |
.size([width, height]) | |
.nodes(nodes) | |
.links(links); | |
force.linkDistance(width / 13); | |
var link = svg.selectAll('.link') | |
.data(links) | |
.enter().append('line') | |
.attr('class', 'link') | |
.attr('x1', function(d) { | |
return nodes[d.source].x; | |
}) | |
.attr('y1', function(d) { | |
return nodes[d.source].y; | |
}) | |
.attr('x2', function(d) { | |
return nodes[d.target].x; | |
}) | |
.attr('y2', function(d) { | |
return nodes[d.target].y; | |
}); | |
var node = svg.selectAll('.node') | |
.data(nodes) | |
.enter().append('g').attr("class", "node"); | |
node.attr("transform", function(d) { | |
return "translate(" + d.x + "," + d.y + ")"; | |
}); | |
var circles = node.append('circle').attr('r', 6); | |
node.append("text") | |
.attr("dx", 12) | |
.attr("dy", ".35em") | |
.text(function(d) { | |
return d.name | |
}); | |
node.each(function(d) { | |
if (d.r > 2) { | |
d3.select(this).classed("red", true) | |
} | |
}); | |
var animationStep = 300; | |
var animating = true; | |
force.charge(function(node) { | |
if (node.r > 2) { | |
return -1*getRandomInt(300, 500); | |
} else if (node.r < 2) { | |
return -1 * getRandomInt(50, 100); | |
} else { | |
return -1*getRandomInt(100, 200); | |
} | |
}); | |
force.linkStrength(function(link) { | |
if (link.target - link.source > 1) return (link.target - link.source)/1.0*(link.target + link.source); | |
return getRandomInt(3, 5) / 5.1; | |
}); | |
force.on("tick", function() { | |
link.transition().ease('linear').duration(animationStep) | |
.attr("x1", function(d) { | |
return d.source.x; | |
}) | |
.attr("y1", function(d) { | |
return d.source.y; | |
}) | |
.attr("x2", function(d) { | |
return d.target.x; | |
}) | |
.attr("y2", function(d) { | |
return d.target.y; | |
}); | |
node.transition().ease('linear').duration(animationStep) | |
.attr("transform", function(d) { | |
return "translate(" + d.x + "," + d.y + ")"; | |
}); | |
/*force.stop(); | |
setTimeout( | |
function() { | |
force.start(); | |
}, | |
animationStep | |
);*/ | |
}); | |
force.start(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment