igorette (owner)

Revisions

gist: 19876 Download_button fork
public
Public Clone URL: git://gist.github.com/19876.git
Embed All Files: show embed
x #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/************************************************
 *WordCloud methods below are modifications of wordcloud by yoah.bardavid@gmail.com and razbarvaz@gmail.com
 *wordcloud is part of the following project: http://visapi-gadgets.googlecode.com
 *The visapi-gadgets project is licensed under Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
 *Modifications include:
 * inclusion of a some sort of a log scale
 * exclusion of single letter words
 * exclusion of excludedTerms
 * exclusion of numbers
 * attempt to merge plural and singular
 * sort by frequency
 * wordLimit to display only most frequent terms
 ************************************************/
 
WordCloud = function(text) {
   this.selectedText = text;
}
   
// Add all word in a given text to a list and map.
// list is a list of unique words.
// map is a set of all found words.
WordCloud.addWords = function(text, list, map) {
 
      var excludedTerms={a:1, an:1, as:1, in:1, into:1, upon:1, out:1, now:1, then:1, the:1, that:1, there:1, this:1, these:1, those:1, thus:1, and:1, or:1, what:1, who:1, whose:1, why:1, with:1, where:1, which:1, while:1, when:1, for:1, from:1, at:1, how:1, by:1, on:1, be:1, is:1, am:1, are:1, was:1, were:1, been:1, being:1, has:1, had:1, have:1, can:1, could:1, may:1, might:1, do:1, does:1, did:1, doing:1, done:1, will:1, would:1, should:1, shall:1, any:1, due:1, of:1, to:1, et:1, al:1, one:1, such:1, but:1, over:1, other:1, if:1, not:1, well:1, more:1, than:1, also:1, I:1, he:1, she:1, we:1, you:1, your:1, yours:1, they:1, my:1, his:1, her:1, hers:1, yours:1, their:1, our:1, ours:1, me:1, us:1, them:1, mine:1, it:1, its:1, some:1, many:1, very:1, few:1, all:1, only:1, dr:1};
 
   var word = '';
   for (var i = 0; i < text.length; i++) {
  var c = text.charAt(i);
  if (' ,.<>[]{}/`�~!@#$%^&*()-_=+\'"\\|:;?\r\r\n'.indexOf(c) >= 0) {
    if (word.length > 0) {
   WordCloud.addWord(word, list, map);
    }
    word = '';
  } else {
    word += c;
  }
   }
   if (word.length > 1 && !excludedTerms[word.toLowerCase()] && !(word.match(/\d+/))) {
   WordCloud.addWord(word, list, map);
   }
};
 
// Add a single word to a list and map.
// list is a list of unique words.
// map is a set of all found words.
WordCloud.addWord = function(word, list, map) {
   var wl = word.toLowerCase();
          var plural=wl+"s";//potential plural of wl
          var singular=wl.replace(/s$/,'');//potential singular of wl
   if (map[wl]) {
  map[wl]++;
   } else if (map[plural]){//we already have the plural, so we continue with it
                map[plural]++;
          } else if (map[singular]){//wl is a potential plural and we alredy have the singular in map
                map[singular]++;
          } else {
  map[wl] = 1;//never seen the word so add it
  list.push(wl);//was word in the original but i prefer to transform everythinkn in lower case
   }
};
  
WordCloud.MIN_UNIT_SIZE = 1;
WordCloud.MAX_UNIT_SIZE = 8;
WordCloud.RANGE_UNIT_SIZE = WordCloud.MAX_UNIT_SIZE - WordCloud.MIN_UNIT_SIZE;
 
WordCloud.prototype.draw = function(options) {
          if (!options){
            var options={};
            options.minimum=3;
            options.maximum=20;
            options.wordLimit=100;
          };
   
   var styleArray=["font-size: 10px; color: #acc1f3;",
       "font-size: 14px; color: #86a0dc;",
       "font-size: 18px; color: #607ec5;",
       "font-size: 22px; color: #264ca2;",
       "font-size: 26px; color: #133b97;",
       "font-size: 32px; color: #002a8b;",
       "font-size: 36px; color: #071a41;",
       "font-size: 40px; color: #081122;",
       "font-size: 44px; color: #000000;"];
 
   var wordMap = {};//was {}
   var wordList = [];
   var splittedText = this.selectedText.split(/\W/);
 
   for (var index in splittedText) {
   WordCloud.addWords(splittedText[index], wordList, wordMap);
   }
    
   // Compute frequency range
   var minFreq = Math.log(options.minimum);
   var maxFreq = Math.log(options.maximum);
   for (var word in wordMap) {
  var f = Math.log(wordMap[word]);
  minFreq = Math.min(minFreq, f);
  maxFreq = Math.max(maxFreq, f);
   }
   var range = maxFreq - minFreq;
   range = Math.max(range, 1);
 
   // Idea: Add option to sort by text, freq or no sort
 
          //from http://news.hping.org/comp.lang.javascript.archive/0867.html
          function sortAssoc(aInput){
            var aTemp = [];
            for (var sKey in aInput)
              aTemp.push([sKey, aInput[sKey]]);
              aTemp.sort(function () {return arguments[0][1] > arguments[1][1]});
 
              var aOutput = [];
              for (var nIndex = aTemp.length-1; nIndex >=0; nIndex--)
              aOutput[aTemp[nIndex][0]] = aTemp[nIndex][1];
 
              return aOutput;
          };
 
          wordMap=sortAssoc(wordMap);
 
   var html = [];
   html.push('<div class="word-cloud" style="background-color:white; padding: 10px 10px">');
      var i=0;
   for (var word in wordMap) {
        var freq = Math.log(wordMap[word]);
     var size = WordCloud.MIN_UNIT_SIZE +
                   Math.round((freq - minFreq) / range * WordCloud.RANGE_UNIT_SIZE);
     html.push("<span class=\"wcl\" style=\"",styleArray[size-1],"\"> ",word,"</span>");
 
        if (i++==options.wordLimit){break};
   }
   html.push('</div>');
      return wordCloudHTML = html.join('');
};
//end of word cloud routines
/****************************/
 
 
CmdUtils.CreateCommand({
//based on http://visapi-gadgets.googlecode.com/svn/trunk/wordcloud/wc.js
  author: {name: "Thomas Lemberger", email: "thomas.lemberger@gmail.com"},
  license: "GPL",
  name: "word-cloud",
  takes: {"text": noun_arb_text},
  help: "Select some text, call Ubiquity (option-space), type 'word-cloud' and (optional) '#' followed by a number to limit the number of words displayed (eg 'word-cloud #30' shows the 30 most frequent terms). A link allows to replace the selected text by the word cloud.",
  description: "This command generates a word cloud from selection. Based on wordcloud (http://visapi-gadgets.googlecode.com) by yoah.bardavid@gmail.com and razbarvaz@gmail.com",
 
  preview: function(pBlock, directObject){
   pBlock.ownerDocument.setSelection = function(content,option){CmdUtils.setSelection(content,option)};
   var matches=directObject.text.match(/#(\d+)$/);
   if (matches){limit=matches[1]} else {limit=100};
   var inputText=CmdUtils.getSelection();
   if (!inputText){inputText=jQuery("p,:header,a",context.focusedWindow.document.body).text()};//not very good...
   var wc = new WordCloud(inputText);
   var cloudHTML=wc.draw({minimum:3, maximum:100, wordLimit: limit});
   var link="<br/><span onmouseover=\"this.style.cursor='pointer'\" "+
              " onclick=\"setSelection(unescape('"+escape(cloudHTML)+"'))\">"+
              "insert this wordcloud &gt;&gt;&gt;&gt;</span>";
   pBlock.innerHTML="Limit the size of the cloud to #"+limit+" words.<br/><br/>" + cloudHTML + link;
  },
  
  execute: function(){}
});