Stantheman/wally.go

## wally.go
package main

import (
	"bufio"
	"bytes"
	"fmt"
	IO "io/ioutil"
	"os"
	"sort"
	"strings"
)

type wordsSortableByFrequency []*wordWithFrequency

type wordWithFrequency struct {
	word      string
	frequency int
}

func (d wordsSortableByFrequency) Len() int {
	return len(d)
}

func (d wordsSortableByFrequency) Swap(i, j int) {
	d[i], d[j] = d[j], d[i]
}

func (d wordsSortableByFrequency) Less(i, j int) bool {
	return d[i].frequency < d[j].frequency
}

func main() {
	// Get the current path
	//
	// TODO: Check the error

	// Join the path til the repo with the text
	//
	PRIDE_AND_PREJUDICE := "pride-and-prejudice.txt"
	STOP_WORDS := "stop_words.txt"

	// Generate the stop words and put them in an array
	// in case of stopwords we can just read the file and put in memory
	//
	stopWordsContents, _ := IO.ReadFile(STOP_WORDS)
	stopWordsContents = stopWordsContents[0:(len(stopWordsContents) - 3)] // remove three line breaks at the end
	// TODO: check the error

	// Split the contents of the file to generate the words to ignore
	//
	stopWords := strings.Split(strings.ToLower(string(stopWordsContents)), ",")

	// Now merge the single letters too...
	// Generate the alphabet in lowercase: a..z (97..123 in ascii)
	//
	for i := 97; i < 123; i++ {
		stopWords = append(stopWords, string(i))
	}

	// Leave this one open (defer closing)
	prideAndPrejudiceTextFile, _ := os.Open(PRIDE_AND_PREJUDICE)
	defer prideAndPrejudiceTextFile.Close()

	// For reading the pride and prejudice text, we use a scanner instead
	//
	prideAndPrejudiceTextReader := bufio.NewReader(prideAndPrejudiceTextFile)
	scanner := bufio.NewScanner(prideAndPrejudiceTextReader)

	// Only capture lowercase alphanumeric characters
	//
	wordFrequency := make(map[string]int)
	var wordBuffer bytes.Buffer
	for scanner.Scan() {
		line := strings.ToLower(scanner.Text())

		for _, c := range line {
			if c >= 97 && c <= 123 { // Filter alphanumeric
				wordBuffer.WriteRune(c)
			} else if c == 32 { // Empty space, meaning that we have a word
				if w := wordBuffer.String(); len(w) > 0 {
					wordFrequency[w]++
					wordBuffer.Truncate(0)
				}

			} else {
				if w := wordBuffer.String(); len(w) > 0 {
					wordFrequency[w]++
					wordBuffer.Truncate(0)
				}
			}
		}
		if w := wordBuffer.String(); len(w) > 0 {
			wordFrequency[w]++
			wordBuffer.Truncate(0)
		}

	}

	// Remove the words that should be ignored
	//
	for _, word := range stopWords {
		delete(wordFrequency, word)
	}

	// Turn the wordFrequency map into a list so that entries are comparable?
	//
	sortableWordsList := make(wordsSortableByFrequency, 0, len(wordFrequency))
	for word, frequency := range wordFrequency {
		w := wordWithFrequency{word, frequency}
		sortableWordsList = append(sortableWordsList, &w) // pass reference
	}

	// Sort!
	//
	sort.Sort(sort.Reverse(sortableWordsList))
	for i, w := range sortableWordsList {
		fmt.Println(w.word, " - ", w.frequency)
		if i > 25 {
			break
		}
	}
}
	package main

	import (
	"bufio"
	"bytes"
	"fmt"
	IO "io/ioutil"
	"os"
	"sort"
	"strings"
	)

	type wordsSortableByFrequency []*wordWithFrequency

	type wordWithFrequency struct {
	word string
	frequency int
	}

	func (d wordsSortableByFrequency) Len() int {
	return len(d)
	}

	func (d wordsSortableByFrequency) Swap(i, j int) {
	d[i], d[j] = d[j], d[i]
	}

	func (d wordsSortableByFrequency) Less(i, j int) bool {
	return d[i].frequency < d[j].frequency
	}

	func main() {
	// Get the current path
	//
	// TODO: Check the error

	// Join the path til the repo with the text
	//
	PRIDE_AND_PREJUDICE := "pride-and-prejudice.txt"
	STOP_WORDS := "stop_words.txt"

	// Generate the stop words and put them in an array
	// in case of stopwords we can just read the file and put in memory
	//
	stopWordsContents, _ := IO.ReadFile(STOP_WORDS)
	stopWordsContents = stopWordsContents[0:(len(stopWordsContents) - 3)] // remove three line breaks at the end
	// TODO: check the error

	// Split the contents of the file to generate the words to ignore
	//
	stopWords := strings.Split(strings.ToLower(string(stopWordsContents)), ",")

	// Now merge the single letters too...
	// Generate the alphabet in lowercase: a..z (97..123 in ascii)
	//
	for i := 97; i < 123; i++ {
	stopWords = append(stopWords, string(i))
	}

	// Leave this one open (defer closing)
	prideAndPrejudiceTextFile, _ := os.Open(PRIDE_AND_PREJUDICE)
	defer prideAndPrejudiceTextFile.Close()

	// For reading the pride and prejudice text, we use a scanner instead
	//
	prideAndPrejudiceTextReader := bufio.NewReader(prideAndPrejudiceTextFile)
	scanner := bufio.NewScanner(prideAndPrejudiceTextReader)

	// Only capture lowercase alphanumeric characters
	//
	wordFrequency := make(map[string]int)
	var wordBuffer bytes.Buffer
	for scanner.Scan() {
	line := strings.ToLower(scanner.Text())

	for _, c := range line {
	if c >= 97 && c <= 123 { // Filter alphanumeric
	wordBuffer.WriteRune(c)
	} else if c == 32 { // Empty space, meaning that we have a word
	if w := wordBuffer.String(); len(w) > 0 {
	wordFrequency[w]++
	wordBuffer.Truncate(0)
	}

	} else {
	if w := wordBuffer.String(); len(w) > 0 {
	wordFrequency[w]++
	wordBuffer.Truncate(0)
	}
	}
	}
	if w := wordBuffer.String(); len(w) > 0 {
	wordFrequency[w]++
	wordBuffer.Truncate(0)
	}

	}

	// Remove the words that should be ignored
	//
	for _, word := range stopWords {
	delete(wordFrequency, word)
	}

	// Turn the wordFrequency map into a list so that entries are comparable?
	//
	sortableWordsList := make(wordsSortableByFrequency, 0, len(wordFrequency))
	for word, frequency := range wordFrequency {
	w := wordWithFrequency{word, frequency}
	sortableWordsList = append(sortableWordsList, &w) // pass reference
	}

	// Sort!
	//
	sort.Sort(sort.Reverse(sortableWordsList))
	for i, w := range sortableWordsList {
	fmt.Println(w.word, " - ", w.frequency)
	if i > 25 {
	break
	}
	}
	}