Last active
January 4, 2022 01:46
-
-
Save vlandham/9f22ca7f11789ef90ec66457b12d162f to your computer and use it in GitHub Desktop.
TextArc in D3 -old
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> | |
<title>Textarc with D3</title> | |
<meta name="description" content=""> | |
<meta name="author" content="Jim Vallandingham"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<script src="https://d3js.org/d3.v4.js"></script> | |
<link rel="stylesheet" href="style.css"> | |
</head> | |
<body> | |
<div class="container"> | |
<div id="main" role="main"> | |
<h1 id="title">Alice's Adventures In Wonderland</h1> | |
<div id="word"></div> | |
<div id="vis"></div> | |
<div id="about"> | |
<p>This is an attempt at a partial recreation of the amazing <a href="http://www.textarc.org/">TextArc</a> text visualization by W. Bradford Paley.</p> | |
<p>This is meant as a tribute to the pioneering work, and as an experiment in how one might implement such interactive visualizations using current open web technologies.</p> | |
<p>This version is implemented completely in the browser using <a href="http://d3js.org/">D3.js</a>. Alice in Wonderland text is derived from <a href="http://www.gutenberg.org/ebooks/11">Project Gutenberg</a>.</p> | |
<p><a href="https://github.com/vlandham/textarc">Source Code</a></p> | |
</div> | |
</div> | |
</div> <!--! end of #container --> | |
<script src="stop_words.js"></script> | |
<script src="vis.js"></script> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var stop_words = [ | |
'a', | |
'about', | |
'above', | |
'across', | |
'after', | |
'again', | |
'against', | |
'all', | |
'almost', | |
'alone', | |
'along', | |
'already', | |
'also', | |
'although', | |
'always', | |
'among', | |
'an', | |
'and', | |
'another', | |
'any', | |
'anybody', | |
'anyone', | |
'anything', | |
'anywhere', | |
'are', | |
'area', | |
'areas', | |
'around', | |
'as', | |
'ask', | |
'asked', | |
'asking', | |
'asks', | |
'at', | |
'away', | |
'b', | |
'back', | |
'backed', | |
'backing', | |
'backs', | |
'be', | |
'became', | |
'because', | |
'become', | |
'becomes', | |
'been', | |
'before', | |
'began', | |
'behind', | |
'being', | |
'beings', | |
'best', | |
'better', | |
'between', | |
'big', | |
'both', | |
'but', | |
'by', | |
'c', | |
'came', | |
'can', | |
'cannot', | |
'case', | |
'cases', | |
'certain', | |
'certainly', | |
'clear', | |
'clearly', | |
'come', | |
'could', | |
'd', | |
'did', | |
'differ', | |
'different', | |
'differently', | |
'do', | |
'does', | |
'done', | |
'down', | |
'down', | |
'downed', | |
'downing', | |
'downs', | |
'during', | |
'e', | |
'each', | |
'early', | |
'either', | |
'end', | |
'ended', | |
'ending', | |
'ends', | |
'enough', | |
'even', | |
'evenly', | |
'ever', | |
'every', | |
'everybody', | |
'everyone', | |
'everything', | |
'everywhere', | |
'f', | |
'face', | |
'faces', | |
'fact', | |
'facts', | |
'far', | |
'felt', | |
'few', | |
'find', | |
'finds', | |
'first', | |
'for', | |
'four', | |
'from', | |
'full', | |
'fully', | |
'further', | |
'furthered', | |
'furthering', | |
'furthers', | |
'g', | |
'gave', | |
'general', | |
'generally', | |
'get', | |
'gets', | |
'give', | |
'given', | |
'gives', | |
'go', | |
'going', | |
'good', | |
'goods', | |
'got', | |
'great', | |
'greater', | |
'greatest', | |
'group', | |
'grouped', | |
'grouping', | |
'groups', | |
'h', | |
'had', | |
'has', | |
'have', | |
'having', | |
'he', | |
'her', | |
'here', | |
'high', | |
'higher', | |
'highest', | |
'him', | |
'his', | |
'how', | |
'however', | |
'i', | |
'if', | |
'important', | |
'in', | |
'interest', | |
'interested', | |
'interesting', | |
'interests', | |
'into', | |
'is', | |
'it', | |
'its', | |
'itself', | |
'j', | |
'just', | |
'k', | |
'keep', | |
'keeps', | |
'kind', | |
'knew', | |
'know', | |
'known', | |
'knows', | |
'l', | |
'large', | |
'largely', | |
'last', | |
'later', | |
'latest', | |
'least', | |
'less', | |
'let', | |
'lets', | |
'like', | |
'likely', | |
'long', | |
'longer', | |
'longest', | |
'm', | |
'made', | |
'make', | |
'making', | |
'man', | |
'many', | |
'may', | |
'me', | |
'member', | |
'members', | |
'men', | |
'might', | |
'more', | |
'most', | |
'mostly', | |
'mr', | |
'mrs', | |
'much', | |
'must', | |
'my', | |
'myself', | |
'n', | |
'necessary', | |
'need', | |
'needed', | |
'needing', | |
'needs', | |
'never', | |
'new', | |
'new', | |
'newer', | |
'newest', | |
'next', | |
'no', | |
'nobody', | |
'non', | |
'noone', | |
'not', | |
'nothing', | |
'now', | |
'nowhere', | |
'number', | |
'numbers', | |
'o', | |
'of', | |
'off', | |
'often', | |
'old', | |
'older', | |
'oldest', | |
'on', | |
'once', | |
'one', | |
'only', | |
'open', | |
'opened', | |
'opening', | |
'opens', | |
'or', | |
'order', | |
'ordered', | |
'ordering', | |
'orders', | |
'other', | |
'others', | |
'our', | |
'out', | |
'over', | |
'p', | |
'part', | |
'parted', | |
'parting', | |
'parts', | |
'per', | |
'perhaps', | |
'place', | |
'places', | |
'point', | |
'pointed', | |
'pointing', | |
'points', | |
'possible', | |
'present', | |
'presented', | |
'presenting', | |
'presents', | |
'problem', | |
'problems', | |
'put', | |
'puts', | |
'q', | |
'quite', | |
'r', | |
'rather', | |
'really', | |
'right', | |
'right', | |
'room', | |
'rooms', | |
's', | |
'said', | |
'same', | |
'saw', | |
'say', | |
'says', | |
'second', | |
'seconds', | |
'see', | |
'seem', | |
'seemed', | |
'seeming', | |
'seems', | |
'sees', | |
'several', | |
'shall', | |
'she', | |
'should', | |
'show', | |
'showed', | |
'showing', | |
'shows', | |
'side', | |
'sides', | |
'since', | |
'small', | |
'smaller', | |
'smallest', | |
'so', | |
'some', | |
'somebody', | |
'someone', | |
'something', | |
'somewhere', | |
'state', | |
'states', | |
'still', | |
'still', | |
'such', | |
'sure', | |
't', | |
'take', | |
'taken', | |
'than', | |
'that', | |
'the', | |
'their', | |
'them', | |
'then', | |
'there', | |
'therefore', | |
'these', | |
'they', | |
'thing', | |
'things', | |
'think', | |
'thinks', | |
'this', | |
'those', | |
'though', | |
'thought', | |
'thoughts', | |
'three', | |
'through', | |
'thus', | |
'to', | |
'today', | |
'together', | |
'too', | |
'took', | |
'toward', | |
'turn', | |
'turned', | |
'turning', | |
'turns', | |
'two', | |
'u', | |
'under', | |
'until', | |
'up', | |
'upon', | |
'us', | |
'use', | |
'used', | |
'uses', | |
'v', | |
'very', | |
'w', | |
'want', | |
'wanted', | |
'wanting', | |
'wants', | |
'was', | |
'way', | |
'ways', | |
'we', | |
'well', | |
'wells', | |
'went', | |
'were', | |
'what', | |
'when', | |
'where', | |
'whether', | |
'which', | |
'while', | |
'who', | |
'whole', | |
'whose', | |
'why', | |
'will', | |
'with', | |
'within', | |
'without', | |
'work', | |
'worked', | |
'working', | |
'works', | |
'would', | |
'x', | |
'y', | |
'year', | |
'years', | |
'yet', | |
'you', | |
'young', | |
'younger', | |
'youngest', | |
'your', | |
'yours', | |
'z' | |
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
body, input, textarea { | |
font-family: Georgia, "Times New Roman", Serif; | |
background-color: black; | |
color: white; | |
} | |
#main { | |
color: white; | |
} | |
.container { | |
/*width: 940px;*/ | |
/*margin: auto;*/ | |
width: 980px | |
} | |
.container { | |
padding-right: 15px; | |
padding-left: 15px; | |
margin-right: auto; | |
margin-left: auto; | |
} | |
#word { | |
font-size:20px; | |
color: "white"; | |
position: absolute; | |
top: 20px; | |
left: 30px; | |
} | |
#title, #about { | |
font-family: Georgia, "Times New Roman", Serif; | |
text-align:center; | |
color: #ddd; | |
opacity: 0.4; | |
font-style: italic; | |
} | |
.sentence { | |
opacity: 0.4; | |
fill: #ddd; | |
} | |
.highlight { | |
fill: #1FC946; | |
opacity: 1.0; | |
} | |
.line { | |
stroke:#C5A438; | |
stroke-width:1; | |
fill:none; | |
pointer-events: none; | |
} | |
#vis { | |
/*font-size: 2px;*/ | |
} | |
#vis .word { | |
/*opacity: 0.4;*/ | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d3.selection.prototype.moveToFront = function() { | |
return this.each(function(){ | |
this.parentNode.appendChild(this); | |
}); | |
}; | |
var removePunctuation = function(string) { | |
return string.replace(/['!"#$%&\\'()\*+,\-\.\/:;<=>?@\[\\\]\^_`{|}~']/g," ").replace(/\s{2,}/g," "); | |
}; | |
var visWidth = 960; | |
var visHeight = 500; | |
// pulls out all the sentences | |
// TODO: we don't really need the lengths at all - remove | |
// TODO: whitespace seems to be removed? | |
// - more likely - the interesting spacing is removed in the gutenberg version | |
var sentenceLengths = function(text) { | |
// text = text.replace(/['\"\‘\’]/gm,""); | |
// tregex = /\n|([^\r\n.!?]+([.!?]+|$))/gim; | |
// var sentences = text.match(tregex).map(function(s) { return s.trim(); }); | |
var sentences = text.split("\n"); | |
var data = sentences.map(function(s) { | |
var d = {}; | |
d.sentence = s.replace(/ /g, '\u00a0'); | |
d.lookupSentence = removePunctuation(s).toLowerCase(); | |
d.length = s.length; | |
return d; | |
}); | |
return data; | |
}; | |
// TODO: combine with sentences somehow to link sentence data with word data | |
var getWords = function(text) { | |
text = text.replace(/['\"\‘\’]/gm,""); | |
// text = text.replace(/[.,-\/#!$%\^&\*;:{}=\-_`~()]/g,""); | |
text = removePunctuation(text); | |
var allWords = text.split(" ").map(function(w) { return {"word": w};}); | |
// allWords = allWords.filter(function(w) { return stop_words.indexOf(w.word.toLowerCase()) == -1; }); | |
//TODO: magic knowledge of the size of the ellipse here. | |
var wordCenters = radialPlacement().width(460).height(280).center({"x":visWidth / 2, "y":visHeight / 2 }); | |
wordCenters(allWords); | |
var wordsLen = allWords.length; | |
var words = d3.map(); | |
for(i = 0;i < wordsLen;i++) { | |
var word = allWords[i]; | |
var wordList = []; | |
var wordKey = word.word.toLowerCase(); | |
if(words.has(wordKey)) { | |
wordList = words.get(wordKey); | |
} | |
wordList.push({"word":word.word, "index":i, "pos":i / wordsLen, "x":word.x, "y":word.y, "angle":word.angle}); | |
// if(word.w == "Alice") { | |
// console.log(wordList.length); | |
// } | |
words.set(wordKey, wordList); | |
} | |
// get the version of the word used in the most positions | |
// this will be the visual respresentation used | |
// TODO: still not quite right. Example - FATHER | |
var getMostFrequent = function(positions) { | |
// var words = positions.map(function(p) { return p.word; }); | |
if (positions.length === 1) { | |
return positions[0].word; | |
} | |
var wordCounts = d3.nest() | |
.key(function(p) { return p.word; }) | |
.rollup(function(words) { return words.length;}) | |
.entries(positions); | |
wordCounts.sort(function(a,b) { return b.values - a.values; }); | |
return wordCounts[0].key; | |
}; | |
var wordMap = []; | |
words.forEach(function(word, positions) { | |
var w = {"key":positions[0].word}; | |
w.visual = getMostFrequent(positions); | |
w.x = d3.sum(positions.map(function(p) { return p.x; })) / positions.length; | |
w.y = d3.sum(positions.map(function(p) { return p.y; })) / positions.length; | |
w.positions = positions; | |
// if(word == "Alice") { | |
// console.log(positions); | |
// } | |
w.count = positions.length; | |
wordMap.push(w); | |
}); | |
// sort to put more frequent words on top | |
return wordMap.sort(function(a,b) { return a.count - b.count; }); | |
}; | |
// sets up the x and y for a radial layou | |
// TODO: modified to lazily add parameters to the input keys - so everything | |
// is expected to be an object. Bad for many reasons. | |
var radialPlacement = function() { | |
var values = d3.map(); | |
var increment = 20; | |
var radius = 200; | |
var width = 500; | |
var height = 300; | |
var tapper = -50; | |
var center = {"x":0, "y":0}; | |
var start = -90; | |
var current = start; | |
var radialLocation = function(center, angle, width, height, tapper) { | |
return {"x":(center.x + (width * Math.cos(angle * Math.PI / 180) - tapper)), | |
"y": (center.y + (height * Math.sin(angle * Math.PI / 180) + tapper))}; | |
}; | |
// var placement = function(key) { | |
// var value = values.get(key); | |
// if (!values.has(key)) { | |
// value = place(key); | |
// } | |
// return value; | |
// }; | |
var place = function(obj) { | |
var value = radialLocation(center, current, width, height, tapper); | |
// now it just adds attributes to the object. DANGEROUS | |
obj.x = value.x; | |
obj.y = value.y; | |
obj.angle = current; | |
// values.set(obj,value); | |
current += increment; | |
tapper += increment; | |
tapper = Math.min(tapper, 0); | |
return value; | |
}; | |
var placement = function(keys) { | |
values = d3.map(); | |
increment = 360 / keys.length; | |
keys.forEach(function(k) { | |
place(k); | |
}); | |
}; | |
placement.keys = function(_) { | |
if (!arguments.length) { | |
return d3.keys(values); | |
} | |
setKeys(_); | |
return placement; | |
}; | |
placement.center = function(_) { | |
if (!arguments.length) { | |
return center; | |
} | |
center = _; | |
return placement; | |
}; | |
// placement.radius = function(_) { | |
// if (!arguments.length) { | |
// return radius; | |
// } | |
// | |
// radius = _; | |
// return placement; | |
// }; | |
placement.width = function(_) { | |
if (!arguments.length) { | |
return width; | |
} | |
width = _; | |
return placement; | |
}; | |
placement.height = function(_) { | |
if (!arguments.length) { | |
return height; | |
} | |
height = _; | |
return placement; | |
}; | |
placement.start = function(_) { | |
if (!arguments.length) { | |
return start; | |
} | |
start = _; | |
return placement; | |
}; | |
return placement; | |
}; | |
var chart = function() { | |
var width = visWidth; | |
var height = visHeight; | |
var margin = {top: 20, right: 20, bottom: 20, left: 20}; | |
var g = null; | |
var sentence = null; | |
var word = null; | |
var sentenceCenters = radialPlacement().width(520).center({"x":width / 2 - 30, "y":height / 2 }); | |
var chart = function(selection) { | |
selection.each(function(rawData) { | |
var sentences = rawData.sentences; | |
sentenceCenters(sentences); | |
var words = rawData.words; | |
var svg = d3.select(this).selectAll("svg").data([sentences]); | |
var gEnter = svg.enter().append("svg").append("g"); | |
svg.attr("width", width + margin.left + margin.right ); | |
svg.attr("height", height + margin.top + margin.bottom ); | |
g = svg.select("g") | |
.attr("transform", "translate(" + margin.left + "," + margin.top + ")"); | |
sentence = g.selectAll(".sentence") | |
.data(sentences).enter() | |
.append("text") | |
.attr("class", "sentence") | |
.attr("x", function(d) { return d.x; }) | |
.attr("y", function(d) { return d.y; }) | |
// .attr("text-anchor", function(d) { return d.angle > 90 ? "end" : "start"; }) | |
.attr("text-anchor", "start") | |
// .attr("fill", "#ddd") | |
// .attr("opacity", 0.4) | |
.attr("font-size", "2px") | |
.text(function(d) { return d.sentence; }); | |
var maxCount = d3.max(words, function(w) { return w.count; }); | |
var color = d3.scale.log() | |
.domain([1,maxCount / 2]) | |
.range(["#333", "#fff"]); | |
word = g.selectAll(".word") | |
.data(words.filter(function(w) { return stop_words.indexOf(w.key) == -1; })).enter() | |
.append("text") | |
.attr("class", "word") | |
.attr("x", function(d) { return d.x; }) | |
.attr("y", function(d) { return d.y; }) | |
.attr("text-anchor", "middle") | |
.attr("text-anchor", function(d) { return d.x > (width / 2) ? "end" : "start"; }) | |
// .attr("font-size", function(d) { return (Math.min(d.count, 12)) + "px";}) | |
.attr("font-size", "8px") | |
// .attr("fill", "#ddd") | |
// .attr("opacity", function(d) { return Math.min(d.count / 20, 0.5); }) | |
// .attr("opacity", function(d) { return d.count > 30 ? 0.9 : 0.4; }) | |
// .attr("fill", function(d) { return d.count > 30 ? "#ddd": "#555"; }) | |
.attr("fill", function(d) { return color(d.count); }) | |
.text(function(d) { return d.visual; }) | |
.on("mouseover", mouseover) | |
.on("mouseout", mouseout); | |
}); | |
}; | |
//TODO: this will match sentences with sub-words in them as well. | |
// example "mouse" will match "mouse" but also "doormouse". | |
// a fix would be to add spaces around the word - but then we need | |
// to ensure that the lookupSentence is removing 's and other punctuation properly | |
function getSentencesWith(aWord) { | |
return sentence.filter(function(s) { | |
return s.lookupSentence.indexOf(aWord.toLowerCase()) > -1; | |
}); | |
} | |
function mouseover(d,i) { | |
var bbox = this.getBBox(); | |
var direction = d.x > (width / 2) ? -1 : 1; | |
g.selectAll(".line") | |
.data(d.positions) | |
.enter() | |
.append("line") | |
.attr("class", "line") | |
.attr("x1", d.x + (direction * (bbox.width / 2))) | |
.attr("y1", d.y - (bbox.height / 3)) | |
.attr("x2", function(p) { return p.x; }) | |
.attr("y2", function(p) { return p.y; }); | |
d3.select("#word").html(d.visual); | |
if( !d.sentences ) { | |
d.sentences = getSentencesWith(d.key); | |
} | |
d.sentences.classed("highlight", true).moveToFront(); | |
} | |
function mouseout(d,i) { | |
g.selectAll(".line").remove(); | |
sentence.classed("highlight", false); | |
} | |
return chart; | |
}; | |
function plotData(selector, data, plot) { | |
d3.select(selector) | |
.datum(data) | |
.call(plot); | |
} | |
var plot = chart(); | |
function display(error, text) { | |
var sentences = sentenceLengths(text); | |
var words = getWords(text); | |
plotData("#vis", {"sentences":sentences, "words": words}, plot); | |
} | |
queue() | |
.defer(d3.text, "data/alice.txt") | |
.await(display); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment