Skip to content

Instantly share code, notes, and snippets.

@pfmoore
Created June 29, 2022 12:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pfmoore/f1a696ff231ea38dd8e6e9d3b4c8648b to your computer and use it in GitHub Desktop.
Save pfmoore/f1a696ff231ea38dd8e6e9d3b4c8648b to your computer and use it in GitHub Desktop.
Count words in a file, in Rust
use clap::Parser;
use std::fs::File;
use std::io;
use std::path::Path;
use std::io::BufRead;
use std::collections::HashMap;
use std::cmp::Reverse;
use crossbeam::thread;
type Counts = HashMap<String, u32>;
fn words_in_file<P: AsRef<Path>>(filename: P) -> std::io::Result<Counts> {
let file = File::open(filename)?;
let buf_reader = io::BufReader::new(file);
let mut words = HashMap::new();
for line in buf_reader.lines() {
for word in line?.split_whitespace() {
if let Some(x) = words.get_mut(word) {
*x += 1;
} else {
words.insert(word.to_owned(), 1);
}
}
}
Ok(words)
}
fn total_counts<I>(counts: I) -> Counts
where
I: Iterator<Item=Counts>
{
let mut total = Counts::new();
for c in counts {
for (k, v) in c {
if let Some(x) = total.get_mut(&k) {
*x += v;
} else {
total.insert(k, v);
}
}
}
total
}
fn first_n(words: Counts, len: usize) -> Vec<(String, u32)> {
if len == 0 {
return Vec::new();
}
let mut vec: Vec<(String, u32)> = words.into_iter().collect();
if len < vec.len() {
// Put the len largest at the start and throw the rest away
vec.select_nth_unstable_by_key(len - 1, |(_,n)| Reverse(*n));
vec.truncate(len);
}
vec.sort_unstable_by_key(|(_,n)| Reverse(*n));
vec
}
/// Search for a pattern in a file and display the lines that contain it.
#[derive(Parser)]
struct Args {
/// Nunber of words to report
count: usize,
/// The paths to the files to read
paths: Vec<String>,
}
fn main() {
let args = Args::parse();
let results = thread::scope(|s| {
let threads = args.paths.iter().map(|p| s.spawn(move |_| {
words_in_file(&p)
}));
total_counts(threads.map(|t| (t.join().unwrap().unwrap())))
}).unwrap();
for x in first_n(results, args.count) {
println!("{:?}", x);
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment