Skip to content

Instantly share code, notes, and snippets.

@GroupDocsGists
Last active May 13, 2022 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GroupDocsGists/694d31da56994b95bad031c2e737868d to your computer and use it in GitHub Desktop.
Save GroupDocsGists/694d31da56994b95bad031c2e737868d to your computer and use it in GitHub Desktop.
Count words, unique words, and their occurrence count using C#
// Count Words in PDF document using C#
using (Parser parser = new Parser("path/document.pdf"))
{
// Extract a text into the reader
using (TextReader reader = parser.GetText())
{
string text = reader.ReadToEnd();
char[] chars = { ' ', '.', ',', ';', ':', '?', '\n', '\r' };
// split words
string[] words = text.Split(chars);
// print total word count
Console.WriteLine("Total word count: {0}", stats.Count);
}
}
// Count Unique Words and their occurrences in PDF document using C#
using (Parser parser = new Parser("path/document.pdf"))
{
// Extract text into TextReader
using (TextReader reader = parser.GetText())
{
Dictionary<string, int> stats = new Dictionary<string, int>();
string text = reader.ReadToEnd();
char[] chars = { ' ', '.', ',', ';', ':', '?', '\n', '\r' };
// split words
string[] words = text.Split(chars);
int minWordLength = 2; // Consider a word having more than 2 characters
// iterate over the words collection to count occurrences
foreach (string word in words)
{
string w = word.Trim().ToLower();
if (w.Length > minWordLength)
{
if (!stats.ContainsKey(w))
{
stats.Add(w, 1); // add new word to collection
}
else
{
stats[w] += 1; // update word occurrence count
}
}
}
// order the collection by word count
var orderedStats = stats.OrderByDescending(x => x.Value);
// Print word count Results
Console.WriteLine("Total word count: {0}", stats.Count);
foreach (var pair in orderedStats)
{
Console.WriteLine("Total occurrences of {0}: {1}", pair.Key, pair.Value);
}
}
}
using (TextReader reader = parser.GetText())
{
}
using GroupDocs.Parser;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using (Parser parser = new Parser("sample.pdf"))
{
// your code goes here.
}
// order the list by word count
var orderedStats = stats.OrderByDescending(x => x.Value);
// print total word count
Console.WriteLine("Total word count: {0}", stats.Count);
// print occurrence of each word
foreach (var pair in orderedStats)
{
Console.WriteLine("Total occurrences of {0}: {1}", pair.Key, pair.Value);
}
Dictionary<string, int> stats = new Dictionary<string, int>();
string text = reader.ReadToEnd();
char[] chars = { ' ', '.', ',', ';', ':', '?', '\n', '\r' };
// split words
string[] words = text.Split(chars);
int minWordLength = 2;// to count words having more than 2 characters
// iterate over the word collection to count occurrences
foreach (string word in words)
{
string w = word.Trim().ToLower();
if (w.Length > minWordLength)
{
if (!stats.ContainsKey(w))
{
// add new word to collection
stats.Add(w, 1);
}
else
{
// update word occurrence count
stats[w] += 1;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment