Last active
August 29, 2015 14:22
-
-
Save akitaonrails/eba195aa5a3dca46af96 to your computer and use it in GitHub Desktop.
playing around with Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
actors.list is a +1GB text file fetched from IMDB partial database. | |
Obs: the Rust 3rd party Regex package feels slow at this time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filename = 'actors.list' | |
target_movie = 'Star Wars: Episode V - The Empire Strikes Back' | |
actors = [] | |
actor = nil | |
File.foreach(filename).with_index do |line, line_num| | |
next if line_num < 239 | |
line.encode!(line.encoding, 'binary', invalid: :replace, undef: :replace) | |
if line.strip == '' | |
actor = nil | |
next | |
end | |
slices = line.split(/\t/) | |
actor_buffer = slices.first | |
movie = slices.last | |
if actor.nil? && !actor_buffer.nil? && actor_buffer != '' | |
actor = actor_buffer | |
end | |
if !movie.nil? && movie.include?(target_movie) | |
actors << actor | |
end | |
end | |
for actor_name in actors | |
puts actor_name | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "actors" | |
version = "0.1.0" | |
authors = ["vagrant"] | |
[dependencies] | |
regex = "0.1.8" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::fs::File; | |
use std::io::prelude::*; | |
use std::io::BufReader; | |
extern crate regex; | |
use regex::Regex; | |
fn main() { | |
let filename = "actors.list"; | |
let target_movie = "Star Wars: Episode V - The Empire Strikes Back"; | |
let file = File::open(filename).unwrap(); | |
let mut reader = BufReader::new(&file).lines().skip(239); | |
let mut actor = String::new(); | |
let mut actors : Vec<String> = Vec::new(); | |
let regex = Regex::new(r"^(.*?)\t+(.*?)$").unwrap(); | |
loop { | |
let line = match reader.next() { | |
Some(line) => match line { | |
Ok(line) => line, | |
Err(_) => String::new(), | |
}, | |
None => break, | |
}; | |
match regex.captures(&line) { | |
Some(captures) => { | |
let actor_buffer = captures.at(1).unwrap(); | |
let movie = captures.at(2).unwrap(); | |
if actor.is_empty() && !actor_buffer.is_empty() { | |
actor = actor_buffer.to_string(); | |
} | |
if !movie.is_empty() && movie.contains(&target_movie) { | |
actors.push(actor.to_string()); | |
println!("{}", actor); | |
} | |
}, | |
None => { | |
actor = String::new(); | |
} | |
}; | |
} | |
for actor_name in actors { | |
println!("{}", actor_name); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment