Skip to content

Instantly share code, notes, and snippets.

@vlandham
Last active January 4, 2022 01:46
Show Gist options
  • Save vlandham/9f22ca7f11789ef90ec66457b12d162f to your computer and use it in GitHub Desktop.
Save vlandham/9f22ca7f11789ef90ec66457b12d162f to your computer and use it in GitHub Desktop.
TextArc in D3 -old
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<title>Textarc with D3</title>
<meta name="description" content="">
<meta name="author" content="Jim Vallandingham">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://d3js.org/d3.v4.js"></script>
<link rel="stylesheet" href="style.css">
</head>
<body>
<div class="container">
<div id="main" role="main">
<h1 id="title">Alice's Adventures In Wonderland</h1>
<div id="word"></div>
<div id="vis"></div>
<div id="about">
<p>This is an attempt at a partial recreation of the amazing <a href="http://www.textarc.org/">TextArc</a> text visualization by W. Bradford Paley.</p>
<p>This is meant as a tribute to the pioneering work, and as an experiment in how one might implement such interactive visualizations using current open web technologies.</p>
<p>This version is implemented completely in the browser using <a href="http://d3js.org/">D3.js</a>. Alice in Wonderland text is derived from <a href="http://www.gutenberg.org/ebooks/11">Project Gutenberg</a>.</p>
<p><a href="https://github.com/vlandham/textarc">Source Code</a></p>
</div>
</div>
</div> <!--! end of #container -->
<script src="stop_words.js"></script>
<script src="vis.js"></script>
</body>
</html>
var stop_words = [
'a',
'about',
'above',
'across',
'after',
'again',
'against',
'all',
'almost',
'alone',
'along',
'already',
'also',
'although',
'always',
'among',
'an',
'and',
'another',
'any',
'anybody',
'anyone',
'anything',
'anywhere',
'are',
'area',
'areas',
'around',
'as',
'ask',
'asked',
'asking',
'asks',
'at',
'away',
'b',
'back',
'backed',
'backing',
'backs',
'be',
'became',
'because',
'become',
'becomes',
'been',
'before',
'began',
'behind',
'being',
'beings',
'best',
'better',
'between',
'big',
'both',
'but',
'by',
'c',
'came',
'can',
'cannot',
'case',
'cases',
'certain',
'certainly',
'clear',
'clearly',
'come',
'could',
'd',
'did',
'differ',
'different',
'differently',
'do',
'does',
'done',
'down',
'down',
'downed',
'downing',
'downs',
'during',
'e',
'each',
'early',
'either',
'end',
'ended',
'ending',
'ends',
'enough',
'even',
'evenly',
'ever',
'every',
'everybody',
'everyone',
'everything',
'everywhere',
'f',
'face',
'faces',
'fact',
'facts',
'far',
'felt',
'few',
'find',
'finds',
'first',
'for',
'four',
'from',
'full',
'fully',
'further',
'furthered',
'furthering',
'furthers',
'g',
'gave',
'general',
'generally',
'get',
'gets',
'give',
'given',
'gives',
'go',
'going',
'good',
'goods',
'got',
'great',
'greater',
'greatest',
'group',
'grouped',
'grouping',
'groups',
'h',
'had',
'has',
'have',
'having',
'he',
'her',
'here',
'high',
'higher',
'highest',
'him',
'his',
'how',
'however',
'i',
'if',
'important',
'in',
'interest',
'interested',
'interesting',
'interests',
'into',
'is',
'it',
'its',
'itself',
'j',
'just',
'k',
'keep',
'keeps',
'kind',
'knew',
'know',
'known',
'knows',
'l',
'large',
'largely',
'last',
'later',
'latest',
'least',
'less',
'let',
'lets',
'like',
'likely',
'long',
'longer',
'longest',
'm',
'made',
'make',
'making',
'man',
'many',
'may',
'me',
'member',
'members',
'men',
'might',
'more',
'most',
'mostly',
'mr',
'mrs',
'much',
'must',
'my',
'myself',
'n',
'necessary',
'need',
'needed',
'needing',
'needs',
'never',
'new',
'new',
'newer',
'newest',
'next',
'no',
'nobody',
'non',
'noone',
'not',
'nothing',
'now',
'nowhere',
'number',
'numbers',
'o',
'of',
'off',
'often',
'old',
'older',
'oldest',
'on',
'once',
'one',
'only',
'open',
'opened',
'opening',
'opens',
'or',
'order',
'ordered',
'ordering',
'orders',
'other',
'others',
'our',
'out',
'over',
'p',
'part',
'parted',
'parting',
'parts',
'per',
'perhaps',
'place',
'places',
'point',
'pointed',
'pointing',
'points',
'possible',
'present',
'presented',
'presenting',
'presents',
'problem',
'problems',
'put',
'puts',
'q',
'quite',
'r',
'rather',
'really',
'right',
'right',
'room',
'rooms',
's',
'said',
'same',
'saw',
'say',
'says',
'second',
'seconds',
'see',
'seem',
'seemed',
'seeming',
'seems',
'sees',
'several',
'shall',
'she',
'should',
'show',
'showed',
'showing',
'shows',
'side',
'sides',
'since',
'small',
'smaller',
'smallest',
'so',
'some',
'somebody',
'someone',
'something',
'somewhere',
'state',
'states',
'still',
'still',
'such',
'sure',
't',
'take',
'taken',
'than',
'that',
'the',
'their',
'them',
'then',
'there',
'therefore',
'these',
'they',
'thing',
'things',
'think',
'thinks',
'this',
'those',
'though',
'thought',
'thoughts',
'three',
'through',
'thus',
'to',
'today',
'together',
'too',
'took',
'toward',
'turn',
'turned',
'turning',
'turns',
'two',
'u',
'under',
'until',
'up',
'upon',
'us',
'use',
'used',
'uses',
'v',
'very',
'w',
'want',
'wanted',
'wanting',
'wants',
'was',
'way',
'ways',
'we',
'well',
'wells',
'went',
'were',
'what',
'when',
'where',
'whether',
'which',
'while',
'who',
'whole',
'whose',
'why',
'will',
'with',
'within',
'without',
'work',
'worked',
'working',
'works',
'would',
'x',
'y',
'year',
'years',
'yet',
'you',
'young',
'younger',
'youngest',
'your',
'yours',
'z'
];
body, input, textarea {
font-family: Georgia, "Times New Roman", Serif;
background-color: black;
color: white;
}
#main {
color: white;
}
.container {
/*width: 940px;*/
/*margin: auto;*/
width: 980px
}
.container {
padding-right: 15px;
padding-left: 15px;
margin-right: auto;
margin-left: auto;
}
#word {
font-size:20px;
color: "white";
position: absolute;
top: 20px;
left: 30px;
}
#title, #about {
font-family: Georgia, "Times New Roman", Serif;
text-align:center;
color: #ddd;
opacity: 0.4;
font-style: italic;
}
.sentence {
opacity: 0.4;
fill: #ddd;
}
.highlight {
fill: #1FC946;
opacity: 1.0;
}
.line {
stroke:#C5A438;
stroke-width:1;
fill:none;
pointer-events: none;
}
#vis {
/*font-size: 2px;*/
}
#vis .word {
/*opacity: 0.4;*/
}
d3.selection.prototype.moveToFront = function() {
return this.each(function(){
this.parentNode.appendChild(this);
});
};
var removePunctuation = function(string) {
return string.replace(/['!"#$%&\\'()\*+,\-\.\/:;<=>?@\[\\\]\^_`{|}~']/g," ").replace(/\s{2,}/g," ");
};
var visWidth = 960;
var visHeight = 500;
// pulls out all the sentences
// TODO: we don't really need the lengths at all - remove
// TODO: whitespace seems to be removed?
// - more likely - the interesting spacing is removed in the gutenberg version
var sentenceLengths = function(text) {
// text = text.replace(/['\"\‘\’]/gm,"");
// tregex = /\n|([^\r\n.!?]+([.!?]+|$))/gim;
// var sentences = text.match(tregex).map(function(s) { return s.trim(); });
var sentences = text.split("\n");
var data = sentences.map(function(s) {
var d = {};
d.sentence = s.replace(/ /g, '\u00a0');
d.lookupSentence = removePunctuation(s).toLowerCase();
d.length = s.length;
return d;
});
return data;
};
// TODO: combine with sentences somehow to link sentence data with word data
var getWords = function(text) {
text = text.replace(/['\"\‘\’]/gm,"");
// text = text.replace(/[.,-\/#!$%\^&\*;:{}=\-_`~()]/g,"");
text = removePunctuation(text);
var allWords = text.split(" ").map(function(w) { return {"word": w};});
// allWords = allWords.filter(function(w) { return stop_words.indexOf(w.word.toLowerCase()) == -1; });
//TODO: magic knowledge of the size of the ellipse here.
var wordCenters = radialPlacement().width(460).height(280).center({"x":visWidth / 2, "y":visHeight / 2 });
wordCenters(allWords);
var wordsLen = allWords.length;
var words = d3.map();
for(i = 0;i < wordsLen;i++) {
var word = allWords[i];
var wordList = [];
var wordKey = word.word.toLowerCase();
if(words.has(wordKey)) {
wordList = words.get(wordKey);
}
wordList.push({"word":word.word, "index":i, "pos":i / wordsLen, "x":word.x, "y":word.y, "angle":word.angle});
// if(word.w == "Alice") {
// console.log(wordList.length);
// }
words.set(wordKey, wordList);
}
// get the version of the word used in the most positions
// this will be the visual respresentation used
// TODO: still not quite right. Example - FATHER
var getMostFrequent = function(positions) {
// var words = positions.map(function(p) { return p.word; });
if (positions.length === 1) {
return positions[0].word;
}
var wordCounts = d3.nest()
.key(function(p) { return p.word; })
.rollup(function(words) { return words.length;})
.entries(positions);
wordCounts.sort(function(a,b) { return b.values - a.values; });
return wordCounts[0].key;
};
var wordMap = [];
words.forEach(function(word, positions) {
var w = {"key":positions[0].word};
w.visual = getMostFrequent(positions);
w.x = d3.sum(positions.map(function(p) { return p.x; })) / positions.length;
w.y = d3.sum(positions.map(function(p) { return p.y; })) / positions.length;
w.positions = positions;
// if(word == "Alice") {
// console.log(positions);
// }
w.count = positions.length;
wordMap.push(w);
});
// sort to put more frequent words on top
return wordMap.sort(function(a,b) { return a.count - b.count; });
};
// sets up the x and y for a radial layou
// TODO: modified to lazily add parameters to the input keys - so everything
// is expected to be an object. Bad for many reasons.
var radialPlacement = function() {
var values = d3.map();
var increment = 20;
var radius = 200;
var width = 500;
var height = 300;
var tapper = -50;
var center = {"x":0, "y":0};
var start = -90;
var current = start;
var radialLocation = function(center, angle, width, height, tapper) {
return {"x":(center.x + (width * Math.cos(angle * Math.PI / 180) - tapper)),
"y": (center.y + (height * Math.sin(angle * Math.PI / 180) + tapper))};
};
// var placement = function(key) {
// var value = values.get(key);
// if (!values.has(key)) {
// value = place(key);
// }
// return value;
// };
var place = function(obj) {
var value = radialLocation(center, current, width, height, tapper);
// now it just adds attributes to the object. DANGEROUS
obj.x = value.x;
obj.y = value.y;
obj.angle = current;
// values.set(obj,value);
current += increment;
tapper += increment;
tapper = Math.min(tapper, 0);
return value;
};
var placement = function(keys) {
values = d3.map();
increment = 360 / keys.length;
keys.forEach(function(k) {
place(k);
});
};
placement.keys = function(_) {
if (!arguments.length) {
return d3.keys(values);
}
setKeys(_);
return placement;
};
placement.center = function(_) {
if (!arguments.length) {
return center;
}
center = _;
return placement;
};
// placement.radius = function(_) {
// if (!arguments.length) {
// return radius;
// }
//
// radius = _;
// return placement;
// };
placement.width = function(_) {
if (!arguments.length) {
return width;
}
width = _;
return placement;
};
placement.height = function(_) {
if (!arguments.length) {
return height;
}
height = _;
return placement;
};
placement.start = function(_) {
if (!arguments.length) {
return start;
}
start = _;
return placement;
};
return placement;
};
var chart = function() {
var width = visWidth;
var height = visHeight;
var margin = {top: 20, right: 20, bottom: 20, left: 20};
var g = null;
var sentence = null;
var word = null;
var sentenceCenters = radialPlacement().width(520).center({"x":width / 2 - 30, "y":height / 2 });
var chart = function(selection) {
selection.each(function(rawData) {
var sentences = rawData.sentences;
sentenceCenters(sentences);
var words = rawData.words;
var svg = d3.select(this).selectAll("svg").data([sentences]);
var gEnter = svg.enter().append("svg").append("g");
svg.attr("width", width + margin.left + margin.right );
svg.attr("height", height + margin.top + margin.bottom );
g = svg.select("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
sentence = g.selectAll(".sentence")
.data(sentences).enter()
.append("text")
.attr("class", "sentence")
.attr("x", function(d) { return d.x; })
.attr("y", function(d) { return d.y; })
// .attr("text-anchor", function(d) { return d.angle > 90 ? "end" : "start"; })
.attr("text-anchor", "start")
// .attr("fill", "#ddd")
// .attr("opacity", 0.4)
.attr("font-size", "2px")
.text(function(d) { return d.sentence; });
var maxCount = d3.max(words, function(w) { return w.count; });
var color = d3.scale.log()
.domain([1,maxCount / 2])
.range(["#333", "#fff"]);
word = g.selectAll(".word")
.data(words.filter(function(w) { return stop_words.indexOf(w.key) == -1; })).enter()
.append("text")
.attr("class", "word")
.attr("x", function(d) { return d.x; })
.attr("y", function(d) { return d.y; })
.attr("text-anchor", "middle")
.attr("text-anchor", function(d) { return d.x > (width / 2) ? "end" : "start"; })
// .attr("font-size", function(d) { return (Math.min(d.count, 12)) + "px";})
.attr("font-size", "8px")
// .attr("fill", "#ddd")
// .attr("opacity", function(d) { return Math.min(d.count / 20, 0.5); })
// .attr("opacity", function(d) { return d.count > 30 ? 0.9 : 0.4; })
// .attr("fill", function(d) { return d.count > 30 ? "#ddd": "#555"; })
.attr("fill", function(d) { return color(d.count); })
.text(function(d) { return d.visual; })
.on("mouseover", mouseover)
.on("mouseout", mouseout);
});
};
//TODO: this will match sentences with sub-words in them as well.
// example "mouse" will match "mouse" but also "doormouse".
// a fix would be to add spaces around the word - but then we need
// to ensure that the lookupSentence is removing 's and other punctuation properly
function getSentencesWith(aWord) {
return sentence.filter(function(s) {
return s.lookupSentence.indexOf(aWord.toLowerCase()) > -1;
});
}
function mouseover(d,i) {
var bbox = this.getBBox();
var direction = d.x > (width / 2) ? -1 : 1;
g.selectAll(".line")
.data(d.positions)
.enter()
.append("line")
.attr("class", "line")
.attr("x1", d.x + (direction * (bbox.width / 2)))
.attr("y1", d.y - (bbox.height / 3))
.attr("x2", function(p) { return p.x; })
.attr("y2", function(p) { return p.y; });
d3.select("#word").html(d.visual);
if( !d.sentences ) {
d.sentences = getSentencesWith(d.key);
}
d.sentences.classed("highlight", true).moveToFront();
}
function mouseout(d,i) {
g.selectAll(".line").remove();
sentence.classed("highlight", false);
}
return chart;
};
function plotData(selector, data, plot) {
d3.select(selector)
.datum(data)
.call(plot);
}
var plot = chart();
function display(error, text) {
var sentences = sentenceLengths(text);
var words = getWords(text);
plotData("#vis", {"sentences":sentences, "words": words}, plot);
}
queue()
.defer(d3.text, "data/alice.txt")
.await(display);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment