Skip to content

Instantly share code, notes, and snippets.

Created October 19, 2014 04:13
Show Gist options
  • Save Stantheman/609b36fd60e805adca28 to your computer and use it in GitHub Desktop.
Save Stantheman/609b36fd60e805adca28 to your computer and use it in GitHub Desktop.
package main
import (
IO "io/ioutil"
type wordsSortableByFrequency []*wordWithFrequency
type wordWithFrequency struct {
word string
frequency int
func (d wordsSortableByFrequency) Len() int {
return len(d)
func (d wordsSortableByFrequency) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
func (d wordsSortableByFrequency) Less(i, j int) bool {
return d[i].frequency < d[j].frequency
func main() {
// Get the current path
// TODO: Check the error
// Join the path til the repo with the text
PRIDE_AND_PREJUDICE := "pride-and-prejudice.txt"
STOP_WORDS := "stop_words.txt"
// Generate the stop words and put them in an array
// in case of stopwords we can just read the file and put in memory
stopWordsContents, _ := IO.ReadFile(STOP_WORDS)
stopWordsContents = stopWordsContents[0:(len(stopWordsContents) - 3)] // remove three line breaks at the end
// TODO: check the error
// Split the contents of the file to generate the words to ignore
stopWords := strings.Split(strings.ToLower(string(stopWordsContents)), ",")
// Now merge the single letters too...
// Generate the alphabet in lowercase: a..z (97..123 in ascii)
for i := 97; i < 123; i++ {
stopWords = append(stopWords, string(i))
// Leave this one open (defer closing)
prideAndPrejudiceTextFile, _ := os.Open(PRIDE_AND_PREJUDICE)
defer prideAndPrejudiceTextFile.Close()
// For reading the pride and prejudice text, we use a scanner instead
prideAndPrejudiceTextReader := bufio.NewReader(prideAndPrejudiceTextFile)
scanner := bufio.NewScanner(prideAndPrejudiceTextReader)
// Only capture lowercase alphanumeric characters
wordFrequency := make(map[string]int)
var wordBuffer bytes.Buffer
for scanner.Scan() {
line := strings.ToLower(scanner.Text())
for _, c := range line {
if c >= 97 && c <= 123 { // Filter alphanumeric
} else if c == 32 { // Empty space, meaning that we have a word
if w := wordBuffer.String(); len(w) > 0 {
} else {
if w := wordBuffer.String(); len(w) > 0 {
if w := wordBuffer.String(); len(w) > 0 {
// Remove the words that should be ignored
for _, word := range stopWords {
delete(wordFrequency, word)
// Turn the wordFrequency map into a list so that entries are comparable?
sortableWordsList := make(wordsSortableByFrequency, 0, len(wordFrequency))
for word, frequency := range wordFrequency {
w := wordWithFrequency{word, frequency}
sortableWordsList = append(sortableWordsList, &w) // pass reference
// Sort!
for i, w := range sortableWordsList {
fmt.Println(w.word, " - ", w.frequency)
if i > 25 {
Copy link

wallyqs commented Oct 19, 2014


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment