This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Concurrent; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Text; | |
using System.Threading; | |
using System.Threading.Tasks; | |
using System.Threading.Tasks.Dataflow; | |
// Our main class | |
public class Program | |
{ | |
// Entry Point | |
public static void Main(string[] args) | |
{ | |
var demo = new TaskDemo(); | |
// Note that we explicitly call Wait() on the Task that Run returns. | |
// Depending on what specific version of .NET, you may not be allowed | |
// to use async in the Main method, so we'll compromise here with the | |
// blocking Wait(). If you didn't block here, the program would end | |
// immediately. | |
try | |
{ | |
demo.Run().Wait(); | |
} | |
catch (Exception e) | |
{ | |
Console.WriteLine($"Caught an exception running demo: {e}"); | |
} | |
// Another blocking call which waits until we press ENTER | |
Console.WriteLine("Press [ENTER] to quit..."); | |
Console.ReadLine(); | |
} | |
} | |
// Task Demo Class - Core logic for our program. | |
public class TaskDemo | |
{ | |
// Empty Constructor | |
public TaskDemo() { } | |
// This method runs the task demo. Notice that we use our async keyword and | |
// return a Task type. Because we used the async keyword, we can freely use | |
// await in the scope of the method. | |
public async Task Run() | |
{ | |
// urls for some large text files | |
var textUrls = new[] | |
{ | |
"https://norvig.com/big.txt", | |
"http://textfiles.com/etext/AUTHORS/HAWTHORNE/hawthorne-alice-457.txt", | |
"http://textfiles.com/etext/AUTHORS/HAWTHORNE/hawthorne-egotism-463.txt", | |
"http://textfiles.com/etext/AUTHORS/EMERSON/emerson-man-235.txt", | |
"http://textfiles.com/etext/AUTHORS/TWAIN/abroad.mt", | |
"http://textfiles.com/etext/AUTHORS/TWAIN/wman10.txt", | |
"http://textfiles.com/etext/AUTHORS/WILDE/wilde-picture-615.txt" | |
}; | |
var minWordLength = 10; | |
// Execute the GetTotalWords for each url | |
var tasks = new List<Task<int>>(); | |
foreach (var url in textUrls) | |
{ | |
tasks.Add(GetTotalWords(url, minWordLength)); | |
} | |
// Wait on all tasks to complete | |
var results = await Task.WhenAll(tasks); | |
foreach (var result in results) | |
{ | |
Console.WriteLine($"Result: {result}"); | |
} | |
Console.WriteLine($"Total Words Found: {results.Sum()}"); | |
} | |
// The async method to get the total words. Note that we use the async keyword, but | |
// also not that even though our return type is a Task<int>, the totalWords variable | |
// in Run() is an int. The await automatically unwraps the result on completion. | |
public async Task<int> GetTotalWords(string url, int wordLength) | |
{ | |
// Create a queue to push lines we read to | |
var lines = new BufferBlock<string>(); | |
// Local async function to calculate the total words | |
async Task<int> CalculateTotal() | |
{ | |
int totalWords = 0; | |
string line; | |
while ((line = await lines.ReceiveAsync()) != null) | |
{ | |
totalWords += TotalWordsOfLength(line, wordLength); | |
} | |
return totalWords; | |
} | |
// Start the task to calculate the total words for each line | |
var calculate = CalculateTotal(); | |
using (var client = new WebClient()) | |
using (var stream = await client.OpenReadTaskAsync(url)) | |
using (var reader = new StreamReader(stream, Encoding.ASCII)) | |
{ | |
while (true) | |
{ | |
// Read each line asynchronously -- null return means eof, submit | |
// to the buffer block where it can be processed for word count | |
var line = await reader.ReadLineAsync(); | |
await lines.SendAsync(line); | |
// Break the while loop on null/EOF | |
if (null == line) | |
{ | |
break; | |
} | |
} | |
} | |
// wait for the calculate/word counter to complete | |
return await calculate; | |
} | |
/// <summary> | |
/// Helper method which scans text for non whitespace characters of a specific length. | |
/// </summary> | |
private static int TotalWordsOfLength(string text, int wordLength) | |
{ | |
var index = 0; | |
var count = 0; | |
// Skip all whitespace characters | |
while (index < text.Length && char.IsWhiteSpace(text[index])) | |
{ | |
index++; | |
} | |
// Loop until all characters are consumed | |
while (index < text.Length) | |
{ | |
// Store the current index | |
var lastIndex = index; | |
// Loop until we reach the end of the text are find a white space character | |
while (index < text.Length && !char.IsWhiteSpace(text[index])) | |
{ | |
index++; | |
} | |
// Examine the length of the current index minus the starting index | |
// Increment the count if the length meets the requirement. | |
if (index - lastIndex >= wordLength) | |
{ | |
count++; | |
} | |
// Skip whitespace | |
while (index < text.Length && char.IsWhiteSpace(text[index])) | |
{ | |
index++; | |
} | |
} | |
return count; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment