Created
September 29, 2008 22:13
-
-
Save source-data/13695 to your computer and use it in GitHub Desktop.
generates a word cloud on the fly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/************************************************ | |
*WordCloud methods below are modifications of wordcloud by yoah.bardavid@gmail.com and razbarvaz@gmail.com | |
*wordcloud is part of the following project: http://visapi-gadgets.googlecode.com | |
*The visapi-gadgets project is licensed under Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) | |
*Modifications include: | |
* inclusion of a some sort of a log scale | |
* exclusion of single letter words | |
* exclusion of excludedTerms | |
* exclusion of numbers | |
* attempt to merge plural and singular | |
* sort by frequency | |
* wordLimit to display only most frequent terms | |
************************************************/ | |
WordCloud = function(text) { | |
this.selectedText = text; | |
} | |
// Add all word in a given text to a list and map. | |
// list is a list of unique words. | |
// map is a set of all found words. | |
WordCloud.addWords = function(text, list, map) { | |
var excludedTerms={a:1, an:1, as:1, in:1, into:1, upon:1, out:1, now:1, then:1, the:1, that:1, there:1, this:1, these:1, those:1, thus:1, and:1, or:1, what:1, who:1, whose:1, why:1, with:1, where:1, which:1, while:1, when:1, for:1, from:1, at:1, how:1, by:1, on:1, be:1, is:1, am:1, are:1, was:1, were:1, been:1, being:1, has:1, had:1, have:1, can:1, could:1, may:1, might:1, do:1, does:1, did:1, doing:1, done:1, will:1, would:1, should:1, shall:1, any:1, due:1, of:1, to:1, et:1, al:1, one:1, such:1, but:1, over:1, other:1, if:1, not:1, well:1, more:1, than:1, also:1, I:1, he:1, she:1, we:1, you:1, your:1, yours:1, they:1, my:1, his:1, her:1, hers:1, yours:1, their:1, our:1, ours:1, me:1, us:1, them:1, mine:1, it:1, its:1, some:1, many:1, very:1, few:1, all:1, only:1, dr:1}; | |
var word = ''; | |
for (var i = 0; i < text.length; i++) { | |
var c = text.charAt(i); | |
if (' ,.<>[]{}/`´~!@#$%^&*()-_=+\'"\\|:;?\r\r\n'.indexOf(c) >= 0) { | |
if (word.length > 0) { | |
WordCloud.addWord(word, list, map); | |
} | |
word = ''; | |
} else { | |
word += c; | |
} | |
} | |
if (word.length > 1 && !excludedTerms[word.toLowerCase()] && !(word.match(/\d+/))) { | |
WordCloud.addWord(word, list, map); | |
} | |
}; | |
// Add a single word to a list and map. | |
// list is a list of unique words. | |
// map is a set of all found words. | |
WordCloud.addWord = function(word, list, map) { | |
var wl = word.toLowerCase(); | |
var plural=wl+"s";//potential plural of wl | |
var singular=wl.replace(/s$/,'');//potential singular of wl | |
if (map[wl]) { | |
map[wl]++; | |
} else if (map[plural]){//we already have the plural, so we continue with it | |
map[plural]++; | |
} else if (map[singular]){//wl is a potential plural and we alredy have the singular in map | |
map[singular]++; | |
} else { | |
map[wl] = 1;//never seen the word so add it | |
list.push(wl);//was word in the original but i prefer to transform everythinkn in lower case | |
} | |
}; | |
WordCloud.MIN_UNIT_SIZE = 1; | |
WordCloud.MAX_UNIT_SIZE = 8; | |
WordCloud.RANGE_UNIT_SIZE = WordCloud.MAX_UNIT_SIZE - WordCloud.MIN_UNIT_SIZE; | |
WordCloud.prototype.draw = function(options) { | |
if (!options){ | |
var options={}; | |
options.minimum=3; | |
options.maximum=20; | |
options.wordLimit=100; | |
}; | |
var styleArray=["font-size: 10px; color: #acc1f3;", | |
"font-size: 14px; color: #86a0dc;", | |
"font-size: 18px; color: #607ec5;", | |
"font-size: 22px; color: #264ca2;", | |
"font-size: 26px; color: #133b97;", | |
"font-size: 32px; color: #002a8b;", | |
"font-size: 36px; color: #071a41;", | |
"font-size: 40px; color: #081122;", | |
"font-size: 44px; color: #000000;"]; | |
var wordMap = {};//was {} | |
var wordList = []; | |
var splittedText = this.selectedText.split(/\W/); | |
for (var index in splittedText) { | |
WordCloud.addWords(splittedText[index], wordList, wordMap); | |
} | |
// Compute frequency range | |
var minFreq = Math.log(options.minimum); | |
var maxFreq = Math.log(options.maximum); | |
for (var word in wordMap) { | |
var f = Math.log(wordMap[word]); | |
minFreq = Math.min(minFreq, f); | |
maxFreq = Math.max(maxFreq, f); | |
} | |
var range = maxFreq - minFreq; | |
range = Math.max(range, 1); | |
// Idea: Add option to sort by text, freq or no sort | |
//from http://news.hping.org/comp.lang.javascript.archive/0867.html | |
function sortAssoc(aInput){ | |
var aTemp = []; | |
for (var sKey in aInput) | |
aTemp.push([sKey, aInput[sKey]]); | |
aTemp.sort(function () {return arguments[0][1] > arguments[1][1]}); | |
var aOutput = []; | |
for (var nIndex = aTemp.length-1; nIndex >=0; nIndex--) | |
aOutput[aTemp[nIndex][0]] = aTemp[nIndex][1]; | |
return aOutput; | |
}; | |
wordMap=sortAssoc(wordMap); | |
var html = []; | |
html.push('<div class="word-cloud" style="background-color:white; padding: 10px 10px">'); | |
var i=0; | |
for (var word in wordMap) { | |
var freq = Math.log(wordMap[word]); | |
var size = WordCloud.MIN_UNIT_SIZE + | |
Math.round((freq - minFreq) / range * WordCloud.RANGE_UNIT_SIZE); | |
html.push("<span class=\"wcl\" style=\"",styleArray[size-1],"\"> ",word,"</span>"); | |
if (i++==options.wordLimit){break}; | |
} | |
html.push('</div>'); | |
return wordCloudHTML = html.join(''); | |
}; | |
//end of word cloud routines | |
/****************************/ | |
CmdUtils.CreateCommand({ | |
//based on http://visapi-gadgets.googlecode.com/svn/trunk/wordcloud/wc.js | |
author: {name: "Thomas Lemberger", email: "thomas.lemberger@gmail.com"}, | |
license: "GPL", | |
name: "word-cloud", | |
takes: {"text": noun_arb_text}, | |
help: "Select some text, call Ubiquity (option-space), type 'word-cloud' and (optional) '#' followed by a number to limit the number of words displayed (eg 'word-cloud #30' shows the 30 most frequent terms). A link allows to replace the selected text by the word cloud.", | |
description: "This command generates a word cloud from selection. Based on wordcloud (http://visapi-gadgets.googlecode.com) by yoah.bardavid@gmail.com and razbarvaz@gmail.com", | |
preview: function(pBlock, directObject){ | |
pBlock.ownerDocument.setSelection = function(content,option){CmdUtils.setSelection(content,option)}; | |
var matches=directObject.text.match(/#(\d+)$/); | |
if (matches){limit=matches[1]} else {limit=100}; | |
var inputText=CmdUtils.getSelection(); | |
if (!inputText){inputText=jQuery("p,:header,a",context.focusedWindow.document.body).text()};//not very good... | |
var wc = new WordCloud(inputText); | |
var cloudHTML=wc.draw({minimum:3, maximum:100, wordLimit: limit}); | |
var link="<br/><span onmouseover=\"this.style.cursor='pointer'\" "+ | |
" onclick=\"setSelection(unescape('"+escape(cloudHTML)+"'))\">"+ | |
"insert this wordcloud >>>></span>"; | |
pBlock.innerHTML="Limit the size of the cloud to #"+limit+" words.<br/><br/>" + cloudHTML + link; | |
}, | |
execute: function(){} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment