Skip to content

Instantly share code, notes, and snippets.

@akitaonrails
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akitaonrails/eba195aa5a3dca46af96 to your computer and use it in GitHub Desktop.
Save akitaonrails/eba195aa5a3dca46af96 to your computer and use it in GitHub Desktop.
playing around with Rust
actors.list is a +1GB text file fetched from IMDB partial database.
Obs: the Rust 3rd party Regex package feels slow at this time
filename = 'actors.list'
target_movie = 'Star Wars: Episode V - The Empire Strikes Back'
actors = []
actor = nil
File.foreach(filename).with_index do |line, line_num|
next if line_num < 239
line.encode!(line.encoding, 'binary', invalid: :replace, undef: :replace)
if line.strip == ''
actor = nil
next
end
slices = line.split(/\t/)
actor_buffer = slices.first
movie = slices.last
if actor.nil? && !actor_buffer.nil? && actor_buffer != ''
actor = actor_buffer
end
if !movie.nil? && movie.include?(target_movie)
actors << actor
end
end
for actor_name in actors
puts actor_name
end
[package]
name = "actors"
version = "0.1.0"
authors = ["vagrant"]
[dependencies]
regex = "0.1.8"
use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
extern crate regex;
use regex::Regex;
fn main() {
let filename = "actors.list";
let target_movie = "Star Wars: Episode V - The Empire Strikes Back";
let file = File::open(filename).unwrap();
let mut reader = BufReader::new(&file).lines().skip(239);
let mut actor = String::new();
let mut actors : Vec<String> = Vec::new();
let regex = Regex::new(r"^(.*?)\t+(.*?)$").unwrap();
loop {
let line = match reader.next() {
Some(line) => match line {
Ok(line) => line,
Err(_) => String::new(),
},
None => break,
};
match regex.captures(&line) {
Some(captures) => {
let actor_buffer = captures.at(1).unwrap();
let movie = captures.at(2).unwrap();
if actor.is_empty() && !actor_buffer.is_empty() {
actor = actor_buffer.to_string();
}
if !movie.is_empty() && movie.contains(&target_movie) {
actors.push(actor.to_string());
println!("{}", actor);
}
},
None => {
actor = String::new();
}
};
}
for actor_name in actors {
println!("{}", actor_name);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment