package main | |
import ( | |
"bufio" | |
"io" | |
"log" | |
"net/http" | |
"strings" | |
) | |
// TotalWordsOfLength calculates the total number of words over a specific | |
// length for a line. | |
func TotalWordsOfLength(line string, wordLength int) int { | |
count := 0 | |
words := strings.Fields(line) | |
for _, word := range words { | |
if len(word) >= wordLength { | |
count++ | |
} | |
} | |
return count | |
} | |
// GetTotalWords scans the contents of a url and counts the total number of words | |
// over a specific length | |
func GetTotalWords(url string, wordLength int, result chan<- int) { | |
// This opens the connection to start downloading the contents | |
// of the url | |
resp, err := http.Get(url) | |
if err != nil { | |
close(result) | |
return | |
} | |
// Finally clause to close the body | |
defer resp.Body.Close() | |
// Create an outbound channel that we can pump lines into | |
lineChannel := make(chan string, 10) | |
// Create a go routine that will receive lines that we parse | |
// and calculate a total number of lines | |
go func() { | |
var total int | |
// Receive lines until the lineChannel is explicitly closed | |
for line := range lineChannel { | |
total += TotalWordsOfLength(line, wordLength) | |
} | |
// Push results via channel | |
result <- total | |
}() | |
// Use a buffered reader to read lines from the response body | |
reader := bufio.NewReader(resp.Body) | |
for { | |
// Read a string until we find a newline -- end of file err | |
// denotes we are done reading the contents | |
line, err := reader.ReadString('\n') | |
if err != nil && err == io.EOF { | |
close(lineChannel) | |
return | |
} | |
// Push the currently read line to the word counting go routine | |
lineChannel <- line | |
} | |
} | |
func main() { | |
// urls for some large text files | |
textUrls := []string{ | |
"https://norvig.com/big.txt", | |
"http://textfiles.com/etext/AUTHORS/HAWTHORNE/hawthorne-alice-457.txt", | |
"http://textfiles.com/etext/AUTHORS/HAWTHORNE/hawthorne-egotism-463.txt", | |
"http://textfiles.com/etext/AUTHORS/EMERSON/emerson-man-235.txt", | |
"http://textfiles.com/etext/AUTHORS/TWAIN/abroad.mt", | |
"http://textfiles.com/etext/AUTHORS/TWAIN/wman10.txt", | |
"http://textfiles.com/etext/AUTHORS/WILDE/wilde-picture-615.txt"} | |
minWordLength := 10 | |
// use integer channel to receive total words | |
results := make(chan int) | |
// for each url, execute GetTotalWords as a go routine | |
for _, url := range textUrls { | |
go GetTotalWords(url, minWordLength, results) | |
} | |
// Receive results until we've received the same number | |
// of results as total urls | |
totalResults := 0 | |
totalWords := 0 | |
for { | |
count := <-results | |
totalWords += count | |
log.Printf("Results: %d\n", count) | |
totalResults++ | |
if totalResults >= len(textUrls) { | |
break | |
} | |
} | |
log.Printf("Total Words: %d\n", totalWords) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment