Skip to content

Instantly share code, notes, and snippets.

@giannafusaro
Last active November 23, 2015 05:12
Show Gist options
  • Save giannafusaro/91fe336d18252f99ad33 to your computer and use it in GitHub Desktop.
Save giannafusaro/91fe336d18252f99ad33 to your computer and use it in GitHub Desktop.
# # # # # # # # # # # # # # # # # # #
# HeapNode
# # # # # # # # # # # # # # # # # # #
@HeapNode = class HeapNode
constructor: (trie_node, heap_index) ->
@trie_node = trie_node
@trie_node.heap_index = heap_index
@frequency = trie_node.data['frequency']
@word = trie_node.data['word']
# # # # # # # # # # # # # # # # # # #
# Heap
# # # # # # # # # # # # # # # # # # #
@Heap = class Heap
constructor: (capacity) ->
@capacity = capacity
@nodes = []
swap: (index1, index2) ->
[@nodes[index1], @nodes[index2]] = [@nodes[index2], @nodes[index1]]
@nodes[index1].trie_node.heap_index = index1
@nodes[index2].trie_node.heap_index = index2
# make sure parents have lower frequency than children
heapifyDownFrom: (index) ->
[current, lchild, rchild] = [@nodes[index], @nodes[2*index+1], @nodes[2*index+2]]
if(lchild && (current.frequency > lchild.frequency))
@swap index, (2*index+1)
@heapifyDownFrom(2*index+1)
if(rchild && (current.frequency > rchild.frequency))
@swap index, (2*index+2)
@heapifyDownFrom(2*index+2)
replaceRootWith: (trie_node) ->
@nodes[0].trie_node.heap_index = -1
@nodes[0] = new HeapNode(trie_node, 0)
@heapifyDownFrom(0)
# make sure children don't have lower frequency than parents
heapifyUpFrom: (index) ->
if (index % 2 == 1 || index == 0)
parent_index = index//2
else
parent_index = index//2-1
[current, parent] = [@nodes[index], @nodes[parent_index]]
if(current.frequency < parent.frequency)
@swap(index, parent_index)
@heapifyUpFrom(index//2+1)
add: (trie_node) ->
switch
when trie_node.heap_index != -1
# trie_node exists in tree, update tree
@nodes[trie_node.heap_index].frequency++
@heapifyDownFrom(trie_node.heap_index)
when @nodes.length == @capacity
# heap is full, replace root of heap if word frequency is greater than min of heap
@replaceRootWith(trie_node) if trie_node.data['frequency'] > @nodes[0].frequency
else
# heap is not full and word is not in heap
@nodes.push(new HeapNode(trie_node, @nodes.length))
@heapifyUpFrom((@nodes.length-1))
sort: ->
sorted = []
nodes_cp = @nodes.slice(0)
for i in [@capacity..1]
@swap(0, i-1)
sorted.unshift(@nodes.pop())
@heapifyDownFrom 0
@nodes = nodes_cp
sorted
to_s: ->
"#{node.word} : #{node.frequency}" for node in @nodes
# # # # # # # # # # # # # # # # # # #
# Parser
# # # # # # # # # # # # # # # # # # #
@Parser = class Parser
constructor: ->
@tags = @mostFrequentVisibleWords($('html').visibleText())
visibleText = jQuery.fn.visibleText = ->
$.map(@contents(), (el) ->
if el.nodeType == 3
return $(el).text()
if $(el).is(':visible')
return $(el).visibleText()
return
).join ''
mostFrequentVisibleWords: (content) ->
stopWords = ["", "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves"]
word = ''
@trieheap = new window.TrieHeap
for char in content
if /[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]/.test(char) is false
word = word + char.toLowerCase()
else if word not in stopWords
@trieheap.add(word)
word = ''
else
word = ''
@trieheap.sort()
$(document).ready ->
window.parser = new window.Parser
# # # # # # # # # # # # # # # # # # #
# TrieNode
# # # # # # # # # # # # # # # # # # #
@TrieNode = class TrieNode
constructor: ->
@data = { 'frequency': 0 }
@next_chars = {}
@seen = false
@heap_index = -1
visit: ->
@seen = !@seen
add: (letter) ->
@next_chars[letter] ?= new TrieNode
nextCharsInclude: (letter) ->
!@next_chars[letter].nil?
record: (word) ->
@data['frequency']++
@data['word'] = word
@
# # # # # # # # # # # # # # # # # # #
# TrieTree
# # # # # # # # # # # # # # # # # # #
@TrieTree = class TrieTree
constructor: ->
@root = new TrieNode
add: (word) ->
current = @root
for char in word
current.add char
current = current.next_chars[char]
current.record word
to_s: (node = @root) ->
node.visit()
console.log("word #{node.data.word}, freq: #{node.data.frequency}, heap index: #{node.heap_index}") if (node.data.frequency > 0)
unless node.next_chars.length is 0
for neighbor,value of node.next_chars
@to_s(value) unless value.seen is node.seen
# # # # # # # # # # # # # # # # # # #
# TrieHeap
# # # # # # # # # # # # # # # # # # #
@TrieHeap = class TrieHeap
constructor: (capacity = 10) ->
@trieTree = new window.TrieTree
@heap = new window.Heap(capacity)
add: (word) ->
@heap.add(@trieTree.add(word))
sort: ->
@heap.sort()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment