Skip to content

Instantly share code, notes, and snippets.

@alexisvisco
Last active December 4, 2019 16:56
Show Gist options
  • Save alexisvisco/150194ef1b4ad3dbe5f4414f90ed4b4c to your computer and use it in GitHub Desktop.
Save alexisvisco/150194ef1b4ad3dbe5f4414f90ed4b4c to your computer and use it in GitHub Desktop.
use std::collections::{HashMap, HashSet};
use std::env::args;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::str;
use std::sync::mpsc;
use std::thread;
const BUFFER_SIZE: usize = 4096;
/// This program is a grep that dispatch the search process into different threads.
/// Then join the matched lines through a channel and finally print the lines.
fn main() {
if args().len() < 3 {
println!("bgrep: <file> <pattern>");
return;
}
let filename = args().nth(1).unwrap();
let search = args().nth(2).unwrap();
let file = File::open(filename.clone());
match file {
Ok(f) => search_in_file(&f, &search),
Err(err) => eprintln!("bgrep: {} : unable to read file {}", err, filename),
}
}
#[derive(Debug)]
struct Occurrences {
chunk_index: usize,
lines_to_print: Vec<String>,
}
fn search_in_file(file: &File, search: &String) {
let mut buffer_reader = BufReader::with_capacity(BUFFER_SIZE, file);
let mut chunk_index = 0;
let (tx, rx) = mpsc::channel::<Occurrences>();
loop {
let buffer = consume_buffer(&mut buffer_reader);
match buffer {
None => {
break;
}
Some(buffer) => find_occurrence(tx.clone(), buffer, search.clone(), chunk_index),
}
chunk_index += 1;
}
drop(tx);
let mut occurrences: HashMap<usize, Occurrences> = HashMap::new();
for occurrence in rx {
occurrences.insert(occurrence.chunk_index, occurrence);
}
print_occurrences(search.clone(), &mut occurrences)
}
fn consume_buffer(buffer_reader: &mut BufReader<&File>) -> Option<String> {
let mut full_line = String::new();
'outer: loop {
if full_line.len() != 0 {
let mut buf = vec![];
// Read until the next newline
let read = buffer_reader.read_until(b'\n', &mut buf);
if read.is_err() {
panic!("bgrep: err: {}", read.err().unwrap());
}
match str::from_utf8(&buf) {
Ok(s) => {
full_line.push_str(s);
return Some(full_line);
}
Err(err) => {
panic!("bgrep: err: {}", err);
}
}
}
// First read
match buffer_reader.fill_buf() {
Ok(buffer) => {
let buffer_str = String::from_utf8(buffer.to_vec()).unwrap_or(String::new());
let buffer_str_len = buffer_str.len();
buffer_reader.consume(buffer_str_len);
// There is no more characters to read.
if buffer_str_len == 0 {
return None;
}
full_line.push_str(buffer_str.to_owned().as_str());
if end_with_newline(buffer_str) {
return Some(full_line);
} else {
// When there is no '\n' continue reading to find the next '\n'
continue 'outer;
}
}
Err(err) => {
panic!("bgrep: err: {}", err);
}
}
}
}
fn end_with_newline(buffer_str: String) -> bool {
buffer_str.chars().last().unwrap_or(' ') == '\n'
}
fn find_occurrence(
producer: mpsc::Sender<Occurrences>,
chunk: String,
search: String,
chunk_index: usize,
) {
thread::spawn(move || {
let mut positions: Vec<usize> = chunk
.match_indices(search.as_str())
.collect::<Vec<(usize, &str)>>()
.iter()
.fold(Vec::new(), |mut p, (position, _)| {
p.push(*position);
p
});
positions.sort();
let mut lines = chunk.split('\n');
let mut line = lines.next();
let mut total_chars = 0;
let mut line_index = 0;
let mut lines_to_print: Vec<String> = vec![];
let mut cache_lines_to_print_idx = HashSet::new();
'positions_loop: for pos in positions {
loop {
match line {
None => break 'positions_loop,
Some(current_line) => {
if pos >= total_chars && pos <= total_chars + (current_line.len()) {
if cache_lines_to_print_idx.contains(&line_index.clone()) {
continue 'positions_loop;
}
cache_lines_to_print_idx.insert(line_index.clone());
lines_to_print.push(current_line.to_string());
continue 'positions_loop;
}
total_chars += current_line.len() + 1;
line_index += 1;
line = lines.next();
}
}
}
}
if lines_to_print.len() > 0 {
producer
.send(Occurrences {
chunk_index,
lines_to_print,
})
.unwrap();
}
drop(producer)
});
}
fn print_occurrences(search: String, occurrences: &mut HashMap<usize, Occurrences>) {
let replacer: String = format!("{}{}{}", "\x1b[1;31m", search, "\x1b[0;0m");
let mut keys = occurrences.keys().fold(Vec::new(), |mut p, c| {
p.push(c);
p
});
keys.sort();
for key in keys {
print!(
"{}",
occurrences.get(key).unwrap().lines_to_print.iter().fold(
String::new(),
|mut prev, line| {
prev.push_str(highlight(search.as_str(), replacer.as_str(), line).as_str());
prev
},
)
)
}
}
fn highlight(search: &str, replacer: &str, current_line: &str) -> String {
format!("{}\n", current_line.replace(search, replacer))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment