Skip to content

Instantly share code, notes, and snippets.

@DavidSandey
Created April 29, 2009 00:03
Show Gist options
  • Save DavidSandey/103485 to your computer and use it in GitHub Desktop.
Save DavidSandey/103485 to your computer and use it in GitHub Desktop.
/************************************************
*WordCloud methods below are modifications of wordcloud by yoah.bardavid@gmail.com and razbarvaz@gmail.com
*wordcloud is part of the following project: http://visapi-gadgets.googlecode.com
*The visapi-gadgets project is licensed under Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
*Modifications include:
* inclusion of a some sort of a log scale
* exclusion of single letter words
* exclusion of excludedTerms
* exclusion of numbers
* attempt to merge plural and singular
* sort by frequency
* wordLimit to display only most frequent terms
************************************************/
WordCloud = function(text) {
this.selectedText = text;
}
// Add all word in a given text to a list and map.
// list is a list of unique words.
// map is a set of all found words.
WordCloud.addWords = function(text, list, map) {
var excludedTerms={a:1, an:1, as:1, in:1, into:1, upon:1, out:1, now:1, then:1, the:1, that:1, there:1, this:1, these:1, those:1, thus:1, and:1, or:1, what:1, who:1, whose:1, why:1, with:1, where:1, which:1, while:1, when:1, for:1, from:1, at:1, how:1, by:1, on:1, be:1, is:1, am:1, are:1, was:1, were:1, been:1, being:1, has:1, had:1, have:1, can:1, could:1, may:1, might:1, do:1, does:1, did:1, doing:1, done:1, will:1, would:1, should:1, shall:1, any:1, due:1, of:1, to:1, et:1, al:1, one:1, such:1, but:1, over:1, other:1, if:1, not:1, well:1, more:1, than:1, also:1, I:1, he:1, she:1, we:1, you:1, your:1, yours:1, they:1, my:1, his:1, her:1, hers:1, yours:1, their:1, our:1, ours:1, me:1, us:1, them:1, mine:1, it:1, its:1, some:1, many:1, very:1, few:1, all:1, only:1, dr:1};
var word = '';
for (var i = 0; i < text.length; i++) {
var c = text.charAt(i);
if (' ,.<>[]{}/`´~!@#$%^&*()-_=+\'"\\|:;?\r\r\n'.indexOf(c) >= 0) {
if (word.length > 0) {
WordCloud.addWord(word, list, map);
}
word = '';
} else {
word += c;
}
}
if (word.length > 1 && !excludedTerms[word.toLowerCase()] && !(word.match(/\d+/))) {
WordCloud.addWord(word, list, map);
}
};
// Add a single word to a list and map.
// list is a list of unique words.
// map is a set of all found words.
WordCloud.addWord = function(word, list, map) {
var wl = word.toLowerCase();
var plural=wl+"s";//potential plural of wl
var singular=wl.replace(/s$/,'');//potential singular of wl
if (map[wl]) {
map[wl]++;
} else if (map[plural]){//we already have the plural, so we continue with it
map[plural]++;
} else if (map[singular]){//wl is a potential plural and we alredy have the singular in map
map[singular]++;
} else {
map[wl] = 1;//never seen the word so add it
list.push(wl);//was word in the original but i prefer to transform everythinkn in lower case
}
};
WordCloud.MIN_UNIT_SIZE = 1;
WordCloud.MAX_UNIT_SIZE = 8;
WordCloud.RANGE_UNIT_SIZE = WordCloud.MAX_UNIT_SIZE - WordCloud.MIN_UNIT_SIZE;
WordCloud.prototype.draw = function(options) {
if (!options){
var options={};
options.minimum=3;
options.maximum=20;
options.wordLimit=100;
};
var styleArray=["font-size: 10px; color: #acc1f3;",
"font-size: 14px; color: #86a0dc;",
"font-size: 18px; color: #607ec5;",
"font-size: 22px; color: #264ca2;",
"font-size: 26px; color: #133b97;",
"font-size: 32px; color: #002a8b;",
"font-size: 36px; color: #071a41;",
"font-size: 40px; color: #081122;",
"font-size: 44px; color: #000000;"];
var wordMap = {};//was {}
var wordList = [];
var splittedText = this.selectedText.split(/\W/);
for (var index in splittedText) {
WordCloud.addWords(splittedText[index], wordList, wordMap);
}
// Compute frequency range
var minFreq = Math.log(options.minimum);
var maxFreq = Math.log(options.maximum);
for (var word in wordMap) {
var f = Math.log(wordMap[word]);
minFreq = Math.min(minFreq, f);
maxFreq = Math.max(maxFreq, f);
}
var range = maxFreq - minFreq;
range = Math.max(range, 1);
// Idea: Add option to sort by text, freq or no sort
//from http://news.hping.org/comp.lang.javascript.archive/0867.html
function sortAssoc(aInput){
var aTemp = [];
for (var sKey in aInput)
aTemp.push([sKey, aInput[sKey]]);
aTemp.sort(function () {return arguments[0][1] > arguments[1][1]});
var aOutput = [];
for (var nIndex = aTemp.length-1; nIndex >=0; nIndex--)
aOutput[aTemp[nIndex][0]] = aTemp[nIndex][1];
return aOutput;
};
wordMap=sortAssoc(wordMap);
var html = [];
html.push('<div class="word-cloud" style="background-color:white; padding: 10px 10px">');
var i=0;
for (var word in wordMap) {
var freq = Math.log(wordMap[word]);
var size = WordCloud.MIN_UNIT_SIZE +
Math.round((freq - minFreq) / range * WordCloud.RANGE_UNIT_SIZE);
html.push("<span class=\"wcl\" style=\"",styleArray[size-1],"\"> ",word,"</span>");
if (i++==options.wordLimit){break};
}
html.push('</div>');
return wordCloudHTML = html.join('');
};
//end of word cloud routines
/****************************/
CmdUtils.CreateCommand({
//based on http://visapi-gadgets.googlecode.com/svn/trunk/wordcloud/wc.js
author: {name: "Thomas Lemberger", email: "thomas.lemberger@gmail.com"},
license: "GPL",
name: "word-cloud",
takes: {"text": noun_arb_text},
help: "Select some text, call Ubiquity (option-space), type 'word-cloud' and (optional) '#' followed by a number to limit the number of words displayed (eg 'word-cloud #30' shows the 30 most frequent terms). A link allows to replace the selected text by the word cloud.",
description: "This command generates a word cloud from selection. Based on wordcloud (http://visapi-gadgets.googlecode.com) by yoah.bardavid@gmail.com and razbarvaz@gmail.com",
preview: function(pBlock, directObject){
pBlock.ownerDocument.setSelection = function(content,option){CmdUtils.setSelection(content,option)};
var matches=directObject.text.match(/#(\d+)$/);
if (matches){limit=matches[1]} else {limit=100};
var inputText=CmdUtils.getSelection();
if (!inputText){inputText=jQuery("p,:header,a",context.focusedWindow.document.body).text()};//not very good...
var wc = new WordCloud(inputText);
var cloudHTML=wc.draw({minimum:3, maximum:100, wordLimit: limit});
var link="<br/><span onmouseover=\"this.style.cursor='pointer'\" "+
" onclick=\"setSelection(unescape('"+escape(cloudHTML)+"'))\">"+
"insert this wordcloud &gt;&gt;&gt;&gt;</span>";
pBlock.innerHTML="Limit the size of the cloud to #"+limit+" words.<br/><br/>" + cloudHTML + link;
},
execute: function(){}
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "add-to-twine",
url: "javascript:(function(){var%20d=document,w=window,l=d.location,e=encodeURIComponent;if(!d.getElementById('rdr-script'))try{var%20s=d.createElement('script');s.type='text/javascript';s.id='rdr-script';s.src='http://www.twine.com/js/spotthis.js';(d.body||d.documentElement).appendChild(s);}catch(x){var%20p='?u='%20+e(l.href)%20+'&t='+e(d.title)%20+'&v=3',u='http://www.twine.com/bookmark/basic'+p;l.href=u%20+'&adv=1';}})()"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "clip-to-evernote",
url: "javascript:(function(){EN_CLIP_HOST='http://preview.evernote.com';try{var%20x=document.createElement('SCRIPT');x.type='text/javascript';x.src=EN_CLIP_HOST+'/public/bookmarkClipper.js?'+(new%20Date().getTime()/100000);document.getElementsByTagName('head')[0].appendChild(x);}catch(e){location.href=EN_CLIP_HOST+'/clip.action?url='+encodeURIComponent(location.href)+'&title='+encodeURIComponent(document.title);}})();"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "shorten-link-@-cli.gs",
url: "javascript:(function(){%20window.open('http://cli.gs/cligs/new?url='+encodeURIComponent(location.href)+'&title='+encodeURIComponent(document.title));%20})();"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "to-english",
url: "javascript:var%20t=((window.getSelection&&window.getSelection())||(document.getSelection&&document.getSelection())||(document.selection&&document.selection.createRange&&document.selection.createRange().text));var%20e=(document.charset||document.characterSet);if(t!=''){location.href='http://translate.google.com/translate_t?text='+t+'&hl=en&langpair=auto|en&tbb=1&ie='+e;}else{location.href='http://translate.google.com/translate?u='+escape(location.href)+'&hl=en&langpair=auto|en&tbb=1&ie='+e;};"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "i-need-to-read-this",
url: "javascript:%28function%28%29%20%7Bwindow.ineedtoreadthis%20%3D%20window.open%28%22%22%2C%20%22intrt%22%2C%22scrollbars%3D0%2Cstatus%3D0%2Cresizable%3D1%2Clocation%3D0%2Ctoolbar%3D0%2Cwidth%3D340%2Cheight%3D300%22%29%3Bvar%20div%20%3D%20document.createElement%28%22div%22%29%3Bdocument.body.appendChild%28div%29%3Bdiv.innerHTML%20%3D%20%22%3Cform%20target%3D%27intrt%27%20method%3D%27post%27%3E%3Cinput%20type%3D%27hidden%27%20name%3D%27url%27/%3E%3Cinput%20type%3D%27hidden%27%20name%3D%27title%27/%3E%3Cinput%20type%3D%27hidden%27%20name%3D%27guid%27/%3E%3C/form%3E%22%3Bvar%20form%20%3D%20div.childNodes%5B0%5D%3Bform.action%20%3D%20%22http%3A//ineedtoreadthis.com/add_bookmark/%22%3Bform.url.value%20%3D%20document.location.href%3Bform.title.value%20%3D%20document.title%3Bform.guid.value%20%3D%20%22c94da0cd-c6af-48d1-b9e6-8e870cfa3567%22%3Bform.submit%28%29%3Bvar%20script%20%3D%20document.createElement%28%22script%22%29%3Bscript.setAttribute%28%27src%27%2C%20%27http%3A//media.ineedtoreadthis.com/js/bookmarklet_add.js%27%29%3Bscript.setAttribute%28%27type%27%2C%20%27text/javascript%27%29%3Bdocument.body.appendChild%28script%29%3B%7D%29%28%29%3B"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "read-an-article",
url: "javascript:%28function%28%29%20%7Bvar%20div%20%3D%20document.createElement%28%22div%22%29%3Bdocument.body.appendChild%28div%29%3Bdiv.innerHTML%20%3D%20%22%3Cform%20method%3D%27post%27%3E%3C/form%3E%22%3Bvar%20form%20%3D%20div.childNodes%5B0%5D%3Bform.action%20%3D%20%22http%3A//ineedtoreadthis.com/pop/%3Fguid%3Dc94da0cd-c6af-48d1-b9e6-8e870cfa3567%22%3Bform.submit%28%29%3B%7D%29%28%29%3B"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "readable",
url: "javascript:(function(){if(document.getElementsByTagName('html').length>0);else{return;}if(document.getElementsByTagName('body').length>0);else{return;}if(window.$readable);else{window.$readable={};window.$readable.path='http://readable-app.appspot.com/';}window.$readable.options={};window.$readable.options.base='blueprint';window.$readable.options.font_family='wide_sans_serif';window.$readable.options.font_size='14';window.$readable.options.text_line_height='1_5';window.$readable.options.text_align='normal';window.$readable.options.text_image_align='center';window.$readable.options.text_box_width='35_em';window.$readable.options.text_box_align='center';window.$readable.options.text_box_outer_margin='1';window.$readable.options.text_box_inner_margin='4_5';window.$readable.options.color_theme='light_grey_off_black';window.$readable.options.background_transparency='90';window.$readable.options.background_transparency_color='from_theme';window.$readable.options.video='keep_major';if(window.$readable.callScript){window.$readable.callScript();return;}if(document.getElementsByTagName('head').length>0);else{document.getElementsByTagName('html')[0].insertBefore(document.createElement('head'),document.getElementsByTagName('body')[0]);}document.getElementsByTagName('head')[0].appendChild(document.createElement('script')).setAttribute('src',window.$readable.path+'target.js?rand='+encodeURIComponent(Math.random()));})()"
});
//Note: This command was automatically generated by the create-bookmarklet-command command.
CmdUtils.makeBookmarkletCommand({
name: "pagezipper",
url: "javascript:(function(){if(window['pgzp']){_pgzpToggleBookmarklet();}else{window._page_zipper_is_bookmarklet=true;window._page_zipper=document.createElement('script');window._page_zipper.type='text/javascript';window._page_zipper.src='http://www.printwhatyoulike.com/static/pagezipper/pagezipper_10.js';document.getElementsByTagName('head')[0].appendChild(window._page_zipper);}})();"
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment