-
-
Save mbolt35/ef7247565ed3b3cc819f1a4080bfb558 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"io" | |
"log" | |
"net/http" | |
"strings" | |
) | |
// TotalWordsOfLength calculates the total number of words over a specific | |
// length for a line. | |
func TotalWordsOfLength(line string, wordLength int) int { | |
count := 0 | |
words := strings.Fields(line) | |
for _, word := range words { | |
if len(word) >= wordLength { | |
count++ | |
} | |
} | |
return count | |
} | |
// GetTotalWords scans the contents of a url and counts the total number of words | |
// over a specific length | |
func GetTotalWords(url string, wordLength int, result chan<- int) { | |
// This opens the connection to start downloading the contents | |
// of the url | |
resp, err := http.Get(url) | |
if err != nil { | |
close(result) | |
return | |
} | |
// Finally clause to close the body | |
defer resp.Body.Close() | |
// Create an outbound channel that we can pump lines into | |
lineChannel := make(chan string, 10) | |
// Create a go routine that will receive lines that we parse | |
// and calculate a total number of lines | |
go func() { | |
var total int | |
// Receive lines until the lineChannel is explicitly closed | |
for line := range lineChannel { | |
total += TotalWordsOfLength(line, wordLength) | |
} | |
// Push results via channel | |
result <- total | |
}() | |
// Use a buffered reader to read lines from the response body | |
reader := bufio.NewReader(resp.Body) | |
for { | |
// Read a string until we find a newline -- end of file err | |
// denotes we are done reading the contents | |
line, err := reader.ReadString('\n') | |
if err != nil && err == io.EOF { | |
close(lineChannel) | |
return | |
} | |
// Push the currently read line to the word counting go routine | |
lineChannel <- line | |
} | |
} | |
func main() { | |
// urls for some large text files | |
textUrls := []string{ | |
"https://norvig.com/big.txt", | |
"http://textfiles.com/etext/AUTHORS/HAWTHORNE/hawthorne-alice-457.txt", | |
"http://textfiles.com/etext/AUTHORS/HAWTHORNE/hawthorne-egotism-463.txt", | |
"http://textfiles.com/etext/AUTHORS/EMERSON/emerson-man-235.txt", | |
"http://textfiles.com/etext/AUTHORS/TWAIN/abroad.mt", | |
"http://textfiles.com/etext/AUTHORS/TWAIN/wman10.txt", | |
"http://textfiles.com/etext/AUTHORS/WILDE/wilde-picture-615.txt"} | |
minWordLength := 10 | |
// use integer channel to receive total words | |
results := make(chan int) | |
// for each url, execute GetTotalWords as a go routine | |
for _, url := range textUrls { | |
go GetTotalWords(url, minWordLength, results) | |
} | |
// Receive results until we've received the same number | |
// of results as total urls | |
totalResults := 0 | |
totalWords := 0 | |
for { | |
count := <-results | |
totalWords += count | |
log.Printf("Results: %d\n", count) | |
totalResults++ | |
if totalResults >= len(textUrls) { | |
break | |
} | |
} | |
log.Printf("Total Words: %d\n", totalWords) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment