Skip to content

Instantly share code, notes, and snippets.

@Nemo157
Created May 5, 2009 07:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Nemo157/106875 to your computer and use it in GitHub Desktop.
Save Nemo157/106875 to your computer and use it in GitHub Desktop.
/************************************************
*WordCloud methods below are modifications of wordcloud by yoah.bardavid@gmail.com and razbarvaz@gmail.com
*wordcloud is part of the following project: http://visapi-gadgets.googlecode.com
*The visapi-gadgets project is licensed under Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
*Modifications include:
* inclusion of a some sort of a log scale
* exclusion of single letter words
* exclusion of excludedTerms
* exclusion of numbers
* attempt to merge plural and singular
* sort by frequency
* wordLimit to display only most frequent terms
************************************************/
WordCloud = function(text) {
this.selectedText = text;
}
// Add all word in a given text to a list and map.
// list is a list of unique words.
// map is a set of all found words.
WordCloud.addWords = function(text, list, map) {
var excludedTerms={a:1, an:1, as:1, in:1, into:1, upon:1, out:1, now:1, then:1, the:1, that:1, there:1, this:1, these:1, those:1, thus:1, and:1, or:1, what:1, who:1, whose:1, why:1, with:1, where:1, which:1, while:1, when:1, for:1, from:1, at:1, how:1, by:1, on:1, be:1, is:1, am:1, are:1, was:1, were:1, been:1, being:1, has:1, had:1, have:1, can:1, could:1, may:1, might:1, do:1, does:1, did:1, doing:1, done:1, will:1, would:1, should:1, shall:1, any:1, due:1, of:1, to:1, et:1, al:1, one:1, such:1, but:1, over:1, other:1, if:1, not:1, well:1, more:1, than:1, also:1, I:1, he:1, she:1, we:1, you:1, your:1, yours:1, they:1, my:1, his:1, her:1, hers:1, yours:1, their:1, our:1, ours:1, me:1, us:1, them:1, mine:1, it:1, its:1, some:1, many:1, very:1, few:1, all:1, only:1, dr:1};
var word = '';
for (var i = 0; i < text.length; i++) {
var c = text.charAt(i);
if (' ,.<>[]{}/`´~!@#$%^&*()-_=+\'"\\|:;?\r\r\n'.indexOf(c) >= 0) {
if (word.length > 0) {
WordCloud.addWord(word, list, map);
}
word = '';
} else {
word += c;
}
}
if (word.length > 1 && !excludedTerms[word.toLowerCase()] && !(word.match(/\d+/))) {
WordCloud.addWord(word, list, map);
}
};
// Add a single word to a list and map.
// list is a list of unique words.
// map is a set of all found words.
WordCloud.addWord = function(word, list, map) {
var wl = word.toLowerCase();
var plural=wl+"s";//potential plural of wl
var singular=wl.replace(/s$/,'');//potential singular of wl
if (map[wl]) {
map[wl]++;
} else if (map[plural]){//we already have the plural, so we continue with it
map[plural]++;
} else if (map[singular]){//wl is a potential plural and we alredy have the singular in map
map[singular]++;
} else {
map[wl] = 1;//never seen the word so add it
list.push(wl);//was word in the original but i prefer to transform everythinkn in lower case
}
};
WordCloud.MIN_UNIT_SIZE = 1;
WordCloud.MAX_UNIT_SIZE = 8;
WordCloud.RANGE_UNIT_SIZE = WordCloud.MAX_UNIT_SIZE - WordCloud.MIN_UNIT_SIZE;
WordCloud.prototype.draw = function(options) {
if (!options){
var options={};
options.minimum=3;
options.maximum=20;
options.wordLimit=100;
};
var styleArray=["font-size: 10px; color: #acc1f3;",
"font-size: 14px; color: #86a0dc;",
"font-size: 18px; color: #607ec5;",
"font-size: 22px; color: #264ca2;",
"font-size: 26px; color: #133b97;",
"font-size: 32px; color: #002a8b;",
"font-size: 36px; color: #071a41;",
"font-size: 40px; color: #081122;",
"font-size: 44px; color: #000000;"];
var wordMap = {};//was {}
var wordList = [];
var splittedText = this.selectedText.split(/\W/);
for (var index in splittedText) {
WordCloud.addWords(splittedText[index], wordList, wordMap);
}
// Compute frequency range
var minFreq = Math.log(options.minimum);
var maxFreq = Math.log(options.maximum);
for (var word in wordMap) {
var f = Math.log(wordMap[word]);
minFreq = Math.min(minFreq, f);
maxFreq = Math.max(maxFreq, f);
}
var range = maxFreq - minFreq;
range = Math.max(range, 1);
// Idea: Add option to sort by text, freq or no sort
//from http://news.hping.org/comp.lang.javascript.archive/0867.html
function sortAssoc(aInput){
var aTemp = [];
for (var sKey in aInput)
aTemp.push([sKey, aInput[sKey]]);
aTemp.sort(function () {return arguments[0][1] > arguments[1][1]});
var aOutput = [];
for (var nIndex = aTemp.length-1; nIndex >=0; nIndex--)
aOutput[aTemp[nIndex][0]] = aTemp[nIndex][1];
return aOutput;
};
wordMap=sortAssoc(wordMap);
var html = [];
html.push('<div class="word-cloud" style="background-color:white; padding: 10px 10px">');
var i=0;
for (var word in wordMap) {
var freq = Math.log(wordMap[word]);
var size = WordCloud.MIN_UNIT_SIZE +
Math.round((freq - minFreq) / range * WordCloud.RANGE_UNIT_SIZE);
html.push("<span class=\"wcl\" style=\"",styleArray[size-1],"\"> ",word,"</span>");
if (i++==options.wordLimit){break};
}
html.push('</div>');
return wordCloudHTML = html.join('');
};
//end of word cloud routines
/****************************/
CmdUtils.CreateCommand({
//based on http://visapi-gadgets.googlecode.com/svn/trunk/wordcloud/wc.js
author: {name: "Thomas Lemberger", email: "thomas.lemberger@gmail.com"},
license: "GPL",
name: "word-cloud",
takes: {"text": noun_arb_text},
help: "Select some text, call Ubiquity (option-space), type 'word-cloud' and (optional) '#' followed by a number to limit the number of words displayed (eg 'word-cloud #30' shows the 30 most frequent terms). A link allows to replace the selected text by the word cloud.",
description: "This command generates a word cloud from selection. Based on wordcloud (http://visapi-gadgets.googlecode.com) by yoah.bardavid@gmail.com and razbarvaz@gmail.com",
preview: function(pBlock, directObject){
pBlock.ownerDocument.setSelection = function(content,option){CmdUtils.setSelection(content,option)};
var matches=directObject.text.match(/#(\d+)$/);
if (matches){limit=matches[1]} else {limit=100};
var inputText=CmdUtils.getSelection();
if (!inputText){inputText=jQuery("p,:header,a",context.focusedWindow.document.body).text()};//not very good...
var wc = new WordCloud(inputText);
var cloudHTML=wc.draw({minimum:3, maximum:100, wordLimit: limit});
var link="<br/><span onmouseover=\"this.style.cursor='pointer'\" "+
" onclick=\"setSelection(unescape('"+escape(cloudHTML)+"'))\">"+
"insert this wordcloud &gt;&gt;&gt;&gt;</span>";
pBlock.innerHTML="Limit the size of the cloud to #"+limit+" words.<br/><br/>" + cloudHTML + link;
},
execute: function(){}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment