Last active
November 23, 2015 05:12
-
-
Save giannafusaro/91fe336d18252f99ad33 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # # # # # # # # # # # # # # # # # # | |
# HeapNode | |
# # # # # # # # # # # # # # # # # # # | |
@HeapNode = class HeapNode | |
constructor: (trie_node, heap_index) -> | |
@trie_node = trie_node | |
@trie_node.heap_index = heap_index | |
@frequency = trie_node.data['frequency'] | |
@word = trie_node.data['word'] | |
# # # # # # # # # # # # # # # # # # # | |
# Heap | |
# # # # # # # # # # # # # # # # # # # | |
@Heap = class Heap | |
constructor: (capacity) -> | |
@capacity = capacity | |
@nodes = [] | |
swap: (index1, index2) -> | |
[@nodes[index1], @nodes[index2]] = [@nodes[index2], @nodes[index1]] | |
@nodes[index1].trie_node.heap_index = index1 | |
@nodes[index2].trie_node.heap_index = index2 | |
# make sure parents have lower frequency than children | |
heapifyDownFrom: (index) -> | |
[current, lchild, rchild] = [@nodes[index], @nodes[2*index+1], @nodes[2*index+2]] | |
if(lchild && (current.frequency > lchild.frequency)) | |
@swap index, (2*index+1) | |
@heapifyDownFrom(2*index+1) | |
if(rchild && (current.frequency > rchild.frequency)) | |
@swap index, (2*index+2) | |
@heapifyDownFrom(2*index+2) | |
replaceRootWith: (trie_node) -> | |
@nodes[0].trie_node.heap_index = -1 | |
@nodes[0] = new HeapNode(trie_node, 0) | |
@heapifyDownFrom(0) | |
# make sure children don't have lower frequency than parents | |
heapifyUpFrom: (index) -> | |
if (index % 2 == 1 || index == 0) | |
parent_index = index//2 | |
else | |
parent_index = index//2-1 | |
[current, parent] = [@nodes[index], @nodes[parent_index]] | |
if(current.frequency < parent.frequency) | |
@swap(index, parent_index) | |
@heapifyUpFrom(index//2+1) | |
add: (trie_node) -> | |
switch | |
when trie_node.heap_index != -1 | |
# trie_node exists in tree, update tree | |
@nodes[trie_node.heap_index].frequency++ | |
@heapifyDownFrom(trie_node.heap_index) | |
when @nodes.length == @capacity | |
# heap is full, replace root of heap if word frequency is greater than min of heap | |
@replaceRootWith(trie_node) if trie_node.data['frequency'] > @nodes[0].frequency | |
else | |
# heap is not full and word is not in heap | |
@nodes.push(new HeapNode(trie_node, @nodes.length)) | |
@heapifyUpFrom((@nodes.length-1)) | |
sort: -> | |
sorted = [] | |
nodes_cp = @nodes.slice(0) | |
for i in [@capacity..1] | |
@swap(0, i-1) | |
sorted.unshift(@nodes.pop()) | |
@heapifyDownFrom 0 | |
@nodes = nodes_cp | |
sorted | |
to_s: -> | |
"#{node.word} : #{node.frequency}" for node in @nodes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # # # # # # # # # # # # # # # # # # | |
# Parser | |
# # # # # # # # # # # # # # # # # # # | |
@Parser = class Parser | |
constructor: -> | |
@tags = @mostFrequentVisibleWords($('html').visibleText()) | |
visibleText = jQuery.fn.visibleText = -> | |
$.map(@contents(), (el) -> | |
if el.nodeType == 3 | |
return $(el).text() | |
if $(el).is(':visible') | |
return $(el).visibleText() | |
return | |
).join '' | |
mostFrequentVisibleWords: (content) -> | |
stopWords = ["", "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"] | |
word = '' | |
@trieheap = new window.TrieHeap | |
for char in content | |
if /[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]/.test(char) is false | |
word = word + char.toLowerCase() | |
else if word not in stopWords | |
@trieheap.add(word) | |
word = '' | |
else | |
word = '' | |
@trieheap.sort() | |
$(document).ready -> | |
window.parser = new window.Parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # # # # # # # # # # # # # # # # # # | |
# TrieNode | |
# # # # # # # # # # # # # # # # # # # | |
@TrieNode = class TrieNode | |
constructor: -> | |
@data = { 'frequency': 0 } | |
@next_chars = {} | |
@seen = false | |
@heap_index = -1 | |
visit: -> | |
@seen = !@seen | |
add: (letter) -> | |
@next_chars[letter] ?= new TrieNode | |
nextCharsInclude: (letter) -> | |
!@next_chars[letter].nil? | |
record: (word) -> | |
@data['frequency']++ | |
@data['word'] = word | |
@ | |
# # # # # # # # # # # # # # # # # # # | |
# TrieTree | |
# # # # # # # # # # # # # # # # # # # | |
@TrieTree = class TrieTree | |
constructor: -> | |
@root = new TrieNode | |
add: (word) -> | |
current = @root | |
for char in word | |
current.add char | |
current = current.next_chars[char] | |
current.record word | |
to_s: (node = @root) -> | |
node.visit() | |
console.log("word #{node.data.word}, freq: #{node.data.frequency}, heap index: #{node.heap_index}") if (node.data.frequency > 0) | |
unless node.next_chars.length is 0 | |
for neighbor,value of node.next_chars | |
@to_s(value) unless value.seen is node.seen |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # # # # # # # # # # # # # # # # # # | |
# TrieHeap | |
# # # # # # # # # # # # # # # # # # # | |
@TrieHeap = class TrieHeap | |
constructor: (capacity = 10) -> | |
@trieTree = new window.TrieTree | |
@heap = new window.Heap(capacity) | |
add: (word) -> | |
@heap.add(@trieTree.add(word)) | |
sort: -> | |
@heap.sort() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment