Last active
December 4, 2019 16:56
-
-
Save alexisvisco/150194ef1b4ad3dbe5f4414f90ed4b4c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::{HashMap, HashSet}; | |
use std::env::args; | |
use std::fs::File; | |
use std::io::{BufRead, BufReader}; | |
use std::str; | |
use std::sync::mpsc; | |
use std::thread; | |
const BUFFER_SIZE: usize = 4096; | |
/// This program is a grep that dispatch the search process into different threads. | |
/// Then join the matched lines through a channel and finally print the lines. | |
fn main() { | |
if args().len() < 3 { | |
println!("bgrep: <file> <pattern>"); | |
return; | |
} | |
let filename = args().nth(1).unwrap(); | |
let search = args().nth(2).unwrap(); | |
let file = File::open(filename.clone()); | |
match file { | |
Ok(f) => search_in_file(&f, &search), | |
Err(err) => eprintln!("bgrep: {} : unable to read file {}", err, filename), | |
} | |
} | |
#[derive(Debug)] | |
struct Occurrences { | |
chunk_index: usize, | |
lines_to_print: Vec<String>, | |
} | |
fn search_in_file(file: &File, search: &String) { | |
let mut buffer_reader = BufReader::with_capacity(BUFFER_SIZE, file); | |
let mut chunk_index = 0; | |
let (tx, rx) = mpsc::channel::<Occurrences>(); | |
loop { | |
let buffer = consume_buffer(&mut buffer_reader); | |
match buffer { | |
None => { | |
break; | |
} | |
Some(buffer) => find_occurrence(tx.clone(), buffer, search.clone(), chunk_index), | |
} | |
chunk_index += 1; | |
} | |
drop(tx); | |
let mut occurrences: HashMap<usize, Occurrences> = HashMap::new(); | |
for occurrence in rx { | |
occurrences.insert(occurrence.chunk_index, occurrence); | |
} | |
print_occurrences(search.clone(), &mut occurrences) | |
} | |
fn consume_buffer(buffer_reader: &mut BufReader<&File>) -> Option<String> { | |
let mut full_line = String::new(); | |
'outer: loop { | |
if full_line.len() != 0 { | |
let mut buf = vec![]; | |
// Read until the next newline | |
let read = buffer_reader.read_until(b'\n', &mut buf); | |
if read.is_err() { | |
panic!("bgrep: err: {}", read.err().unwrap()); | |
} | |
match str::from_utf8(&buf) { | |
Ok(s) => { | |
full_line.push_str(s); | |
return Some(full_line); | |
} | |
Err(err) => { | |
panic!("bgrep: err: {}", err); | |
} | |
} | |
} | |
// First read | |
match buffer_reader.fill_buf() { | |
Ok(buffer) => { | |
let buffer_str = String::from_utf8(buffer.to_vec()).unwrap_or(String::new()); | |
let buffer_str_len = buffer_str.len(); | |
buffer_reader.consume(buffer_str_len); | |
// There is no more characters to read. | |
if buffer_str_len == 0 { | |
return None; | |
} | |
full_line.push_str(buffer_str.to_owned().as_str()); | |
if end_with_newline(buffer_str) { | |
return Some(full_line); | |
} else { | |
// When there is no '\n' continue reading to find the next '\n' | |
continue 'outer; | |
} | |
} | |
Err(err) => { | |
panic!("bgrep: err: {}", err); | |
} | |
} | |
} | |
} | |
fn end_with_newline(buffer_str: String) -> bool { | |
buffer_str.chars().last().unwrap_or(' ') == '\n' | |
} | |
fn find_occurrence( | |
producer: mpsc::Sender<Occurrences>, | |
chunk: String, | |
search: String, | |
chunk_index: usize, | |
) { | |
thread::spawn(move || { | |
let mut positions: Vec<usize> = chunk | |
.match_indices(search.as_str()) | |
.collect::<Vec<(usize, &str)>>() | |
.iter() | |
.fold(Vec::new(), |mut p, (position, _)| { | |
p.push(*position); | |
p | |
}); | |
positions.sort(); | |
let mut lines = chunk.split('\n'); | |
let mut line = lines.next(); | |
let mut total_chars = 0; | |
let mut line_index = 0; | |
let mut lines_to_print: Vec<String> = vec![]; | |
let mut cache_lines_to_print_idx = HashSet::new(); | |
'positions_loop: for pos in positions { | |
loop { | |
match line { | |
None => break 'positions_loop, | |
Some(current_line) => { | |
if pos >= total_chars && pos <= total_chars + (current_line.len()) { | |
if cache_lines_to_print_idx.contains(&line_index.clone()) { | |
continue 'positions_loop; | |
} | |
cache_lines_to_print_idx.insert(line_index.clone()); | |
lines_to_print.push(current_line.to_string()); | |
continue 'positions_loop; | |
} | |
total_chars += current_line.len() + 1; | |
line_index += 1; | |
line = lines.next(); | |
} | |
} | |
} | |
} | |
if lines_to_print.len() > 0 { | |
producer | |
.send(Occurrences { | |
chunk_index, | |
lines_to_print, | |
}) | |
.unwrap(); | |
} | |
drop(producer) | |
}); | |
} | |
fn print_occurrences(search: String, occurrences: &mut HashMap<usize, Occurrences>) { | |
let replacer: String = format!("{}{}{}", "\x1b[1;31m", search, "\x1b[0;0m"); | |
let mut keys = occurrences.keys().fold(Vec::new(), |mut p, c| { | |
p.push(c); | |
p | |
}); | |
keys.sort(); | |
for key in keys { | |
print!( | |
"{}", | |
occurrences.get(key).unwrap().lines_to_print.iter().fold( | |
String::new(), | |
|mut prev, line| { | |
prev.push_str(highlight(search.as_str(), replacer.as_str(), line).as_str()); | |
prev | |
}, | |
) | |
) | |
} | |
} | |
fn highlight(search: &str, replacer: &str, current_line: &str) -> String { | |
format!("{}\n", current_line.replace(search, replacer)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment