Last active
October 26, 2023 12:59
-
-
Save ashafq/67f4ab22430989fe2c3d4b539f94e898 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Prints top 20 words used in a file | |
*/ | |
use std::cmp::min; | |
use std::collections::HashMap; | |
use std::env; | |
use std::fs; | |
use std::io; | |
use std::io::{BufRead, BufReader, Read}; | |
fn main() { | |
let arg: Vec<String> = env::args().collect(); | |
if arg.len() > 1 { | |
for filename in &arg[1..] { | |
let file = fs::File::open(filename) | |
.expect(format!("Error reading content from file {}", filename).as_str()); | |
top_words(file, filename); | |
} | |
} else { | |
top_words(io::stdin().lock(), "/dev/stdin"); | |
} | |
} | |
fn top_words(reader: impl Read, filename: &str) { | |
let mut word_count: HashMap<String, u32> = HashMap::new(); | |
let buf_read = BufReader::new(reader); | |
// Split contents of file into words and count them | |
for line in buf_read.lines() { | |
for word in line.unwrap().to_lowercase().split_whitespace() { | |
*word_count.entry(word.to_owned()).or_insert(0u32) += 1; | |
} | |
} | |
// Sort most frequent words | |
let mut word_list: Vec<_> = word_count.iter().collect(); | |
word_list.sort_by(|a, b| b.1.cmp(a.1)); | |
let n = min(word_list.len(), 20); | |
println!("{}:", filename); | |
for f in &word_list[..n] { | |
println!("{}\t{}", f.0, f.1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment