alexisvisco/grep_on_steroid.rs

## grep_on_steroid.rs
use std::collections::{HashMap, HashSet};
use std::env::args;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::str;
use std::sync::mpsc;
use std::thread;

const BUFFER_SIZE: usize = 4096;

/// This program is a grep that dispatch the search process into different threads.
/// Then join the matched lines through a channel and finally print the lines.
fn main() {
    if args().len() < 3 {
        println!("bgrep: <file> <pattern>");
        return;
    }

    let filename = args().nth(1).unwrap();
    let search = args().nth(2).unwrap();

    let file = File::open(filename.clone());

    match file {
        Ok(f) => search_in_file(&f, &search),
        Err(err) => eprintln!("bgrep: {} : unable to read file {}", err, filename),
    }
}

#[derive(Debug)]
struct Occurrences {
    chunk_index: usize,
    lines_to_print: Vec<String>,
}

fn search_in_file(file: &File, search: &String) {
    let mut buffer_reader = BufReader::with_capacity(BUFFER_SIZE, file);
    let mut chunk_index = 0;

    let (tx, rx) = mpsc::channel::<Occurrences>();

    loop {
        let buffer = consume_buffer(&mut buffer_reader);

        match buffer {
            None => {
                break;
            }
            Some(buffer) => find_occurrence(tx.clone(), buffer, search.clone(), chunk_index),
        }

        chunk_index += 1;
    }

    drop(tx);

    let mut occurrences: HashMap<usize, Occurrences> = HashMap::new();
    for occurrence in rx {
        occurrences.insert(occurrence.chunk_index, occurrence);
    }

    print_occurrences(search.clone(), &mut occurrences)
}

fn consume_buffer(buffer_reader: &mut BufReader<&File>) -> Option<String> {
    let mut full_line = String::new();
    'outer: loop {
        if full_line.len() != 0 {
            let mut buf = vec![];

            // Read until the next newline
            let read = buffer_reader.read_until(b'\n', &mut buf);
            if read.is_err() {
                panic!("bgrep: err: {}", read.err().unwrap());
            }

            match str::from_utf8(&buf) {
                Ok(s) => {
                    full_line.push_str(s);
                    return Some(full_line);
                }
                Err(err) => {
                    panic!("bgrep: err: {}", err);
                }
            }
        }

        // First read
        match buffer_reader.fill_buf() {
            Ok(buffer) => {
                let buffer_str = String::from_utf8(buffer.to_vec()).unwrap_or(String::new());

                let buffer_str_len = buffer_str.len();
                buffer_reader.consume(buffer_str_len);

                // There is no more characters to read.
                if buffer_str_len == 0 {
                    return None;
                }

                full_line.push_str(buffer_str.to_owned().as_str());

                if end_with_newline(buffer_str) {
                    return Some(full_line);
                } else {
                    // When there is no '\n' continue reading to find the next '\n'
                    continue 'outer;
                }
            }

            Err(err) => {
                panic!("bgrep: err: {}", err);
            }
        }
    }
}

fn end_with_newline(buffer_str: String) -> bool {
    buffer_str.chars().last().unwrap_or(' ') == '\n'
}

fn find_occurrence(
    producer: mpsc::Sender<Occurrences>,
    chunk: String,
    search: String,
    chunk_index: usize,
) {
    thread::spawn(move || {
        let mut positions: Vec<usize> = chunk
            .match_indices(search.as_str())
            .collect::<Vec<(usize, &str)>>()
            .iter()
            .fold(Vec::new(), |mut p, (position, _)| {
                p.push(*position);
                p
            });

        positions.sort();

        let mut lines = chunk.split('\n');
        let mut line = lines.next();
        let mut total_chars = 0;
        let mut line_index = 0;

        let mut lines_to_print: Vec<String> = vec![];
        let mut cache_lines_to_print_idx = HashSet::new();

        'positions_loop: for pos in positions {
            loop {
                match line {
                    None => break 'positions_loop,

                    Some(current_line) => {
                        if pos >= total_chars && pos <= total_chars + (current_line.len()) {
                            if cache_lines_to_print_idx.contains(&line_index.clone()) {
                                continue 'positions_loop;
                            }

                            cache_lines_to_print_idx.insert(line_index.clone());
                            lines_to_print.push(current_line.to_string());
                            continue 'positions_loop;
                        }

                        total_chars += current_line.len() + 1;
                        line_index += 1;
                        line = lines.next();
                    }
                }
            }
        }

        if lines_to_print.len() > 0 {
            producer
                .send(Occurrences {
                    chunk_index,
                    lines_to_print,
                })
                .unwrap();
        }

        drop(producer)
    });
}

fn print_occurrences(search: String, occurrences: &mut HashMap<usize, Occurrences>) {
    let replacer: String = format!("{}{}{}", "\x1b[1;31m", search, "\x1b[0;0m");
    let mut keys = occurrences.keys().fold(Vec::new(), |mut p, c| {
        p.push(c);
        p
    });

    keys.sort();

    for key in keys {
        print!(
            "{}",
            occurrences.get(key).unwrap().lines_to_print.iter().fold(
                String::new(),
                |mut prev, line| {
                    prev.push_str(highlight(search.as_str(), replacer.as_str(), line).as_str());
                    prev
                },
            )
        )
    }
}

fn highlight(search: &str, replacer: &str, current_line: &str) -> String {
    format!("{}\n", current_line.replace(search, replacer))
}
	use std::collections::{HashMap, HashSet};
	use std::env::args;
	use std::fs::File;
	use std::io::{BufRead, BufReader};
	use std::str;
	use std::sync::mpsc;
	use std::thread;

	const BUFFER_SIZE: usize = 4096;

	/// This program is a grep that dispatch the search process into different threads.
	/// Then join the matched lines through a channel and finally print the lines.
	fn main() {
	if args().len() < 3 {
	println!("bgrep: <file> <pattern>");
	return;
	}

	let filename = args().nth(1).unwrap();
	let search = args().nth(2).unwrap();

	let file = File::open(filename.clone());

	match file {
	Ok(f) => search_in_file(&f, &search),
	Err(err) => eprintln!("bgrep: {} : unable to read file {}", err, filename),
	}
	}

	#[derive(Debug)]
	struct Occurrences {
	chunk_index: usize,
	lines_to_print: Vec<String>,
	}

	fn search_in_file(file: &File, search: &String) {
	let mut buffer_reader = BufReader::with_capacity(BUFFER_SIZE, file);
	let mut chunk_index = 0;

	let (tx, rx) = mpsc::channel::<Occurrences>();

	loop {
	let buffer = consume_buffer(&mut buffer_reader);

	match buffer {
	None => {
	break;
	}
	Some(buffer) => find_occurrence(tx.clone(), buffer, search.clone(), chunk_index),
	}

	chunk_index += 1;
	}

	drop(tx);

	let mut occurrences: HashMap<usize, Occurrences> = HashMap::new();
	for occurrence in rx {
	occurrences.insert(occurrence.chunk_index, occurrence);
	}

	print_occurrences(search.clone(), &mut occurrences)
	}

	fn consume_buffer(buffer_reader: &mut BufReader<&File>) -> Option<String> {
	let mut full_line = String::new();
	'outer: loop {
	if full_line.len() != 0 {
	let mut buf = vec![];

	// Read until the next newline
	let read = buffer_reader.read_until(b'\n', &mut buf);
	if read.is_err() {
	panic!("bgrep: err: {}", read.err().unwrap());
	}

	match str::from_utf8(&buf) {
	Ok(s) => {
	full_line.push_str(s);
	return Some(full_line);
	}
	Err(err) => {
	panic!("bgrep: err: {}", err);
	}
	}
	}

	// First read
	match buffer_reader.fill_buf() {
	Ok(buffer) => {
	let buffer_str = String::from_utf8(buffer.to_vec()).unwrap_or(String::new());

	let buffer_str_len = buffer_str.len();
	buffer_reader.consume(buffer_str_len);

	// There is no more characters to read.
	if buffer_str_len == 0 {
	return None;
	}

	full_line.push_str(buffer_str.to_owned().as_str());

	if end_with_newline(buffer_str) {
	return Some(full_line);
	} else {
	// When there is no '\n' continue reading to find the next '\n'
	continue 'outer;
	}
	}

	Err(err) => {
	panic!("bgrep: err: {}", err);
	}
	}
	}
	}

	fn end_with_newline(buffer_str: String) -> bool {
	buffer_str.chars().last().unwrap_or(' ') == '\n'
	}

	fn find_occurrence(
	producer: mpsc::Sender<Occurrences>,
	chunk: String,
	search: String,
	chunk_index: usize,
	) {
	thread::spawn(move \|\| {
	let mut positions: Vec<usize> = chunk
	.match_indices(search.as_str())
	.collect::<Vec<(usize, &str)>>()
	.iter()
	.fold(Vec::new(), \|mut p, (position, _)\| {
	p.push(*position);
	p
	});

	positions.sort();

	let mut lines = chunk.split('\n');
	let mut line = lines.next();
	let mut total_chars = 0;
	let mut line_index = 0;

	let mut lines_to_print: Vec<String> = vec![];
	let mut cache_lines_to_print_idx = HashSet::new();

	'positions_loop: for pos in positions {
	loop {
	match line {
	None => break 'positions_loop,

	Some(current_line) => {
	if pos >= total_chars && pos <= total_chars + (current_line.len()) {
	if cache_lines_to_print_idx.contains(&line_index.clone()) {
	continue 'positions_loop;
	}

	cache_lines_to_print_idx.insert(line_index.clone());
	lines_to_print.push(current_line.to_string());
	continue 'positions_loop;
	}

	total_chars += current_line.len() + 1;
	line_index += 1;
	line = lines.next();
	}
	}
	}
	}

	if lines_to_print.len() > 0 {
	producer
	.send(Occurrences {
	chunk_index,
	lines_to_print,
	})
	.unwrap();
	}

	drop(producer)
	});
	}

	fn print_occurrences(search: String, occurrences: &mut HashMap<usize, Occurrences>) {
	let replacer: String = format!("{}{}{}", "\x1b[1;31m", search, "\x1b[0;0m");
	let mut keys = occurrences.keys().fold(Vec::new(), \|mut p, c\| {
	p.push(c);
	p
	});

	keys.sort();

	for key in keys {
	print!(
	"{}",
	occurrences.get(key).unwrap().lines_to_print.iter().fold(
	String::new(),
	\|mut prev, line\| {
	prev.push_str(highlight(search.as_str(), replacer.as_str(), line).as_str());
	prev
	},
	)
	)
	}
	}

	fn highlight(search: &str, replacer: &str, current_line: &str) -> String {
	format!("{}\n", current_line.replace(search, replacer))
	}