Skip to content

Instantly share code, notes, and snippets.

@paulohrpinheiro
Last active December 7, 2016 16:32
Show Gist options
  • Save paulohrpinheiro/08e44d04d1c516efd50484d1d6f66916 to your computer and use it in GitHub Desktop.
Save paulohrpinheiro/08e44d04d1c516efd50484d1d6f66916 to your computer and use it in GitHub Desktop.
//! Crawler — My own crawler in Rust!
extern crate hyper; // biblioteca (crate) não padrão
use std::env; // argumentos env::args
use std::io::{Read, Write}; // para IO de arquivos
use std::fs::File; // para criar arquivos
use std::path::Path; // configurar nome de arquivo
use std::thread; // concorrência
const ROBOT_NAME: &'static str = "paulohrpinheiro-crawler";
const BUFFER_SIZE: usize = 512;
#[derive(Debug)]
enum DownloadError {
CantGet,
CantRead,
CantWrite,
CantCreate,
InvalidName,
}
fn download_content(url: &str) -> Result<String, DownloadError> {
// Somos um respeitável e conhecido bot
let mut headers = hyper::header::Headers::new();
headers.set(hyper::header::UserAgent(ROBOT_NAME.to_string()));
// Pega cabeçalhos (e possivelmente algum dado já)
let client = hyper::Client::new();
let mut response = match client.get(url).headers(headers).send() {
Err(_) => return Err(DownloadError::CantGet),
Ok(r) => r,
};
let local_filename = filename(url)?;
let mut localfile = create_localfile(&local_filename)?;
// pega conteúdo e salva em arquivo
loop {
let mut buffer = [0; BUFFER_SIZE];
// conseguimos ler?
let bytes_read = match response.read(&mut buffer) {
Err(_) => return Err(DownloadError::CantRead),
Ok(b) => b,
};
// não tem mais nada?
if bytes_read == 0 {
break;
}
// vamos tentar escrever o que pegamos
let bytes_write = match localfile.write(&buffer[0..bytes_read]) {
Err(_) => return Err(DownloadError::CantWrite),
Ok(b) => b,
};
// conseguiu escrever o que leu?
if bytes_write != bytes_read {
return Err(DownloadError::CantWrite);
}
}
return Ok(local_filename);
}
fn filename(url: &str) -> Result<String, DownloadError> {
match Path::new(&url).file_name() {
None => Err(DownloadError::InvalidName),
Some(name) => {
match name.to_str() {
None => Err(DownloadError::InvalidName),
Some(r) => Ok(String::from(r)),
}
}
}
}
fn create_localfile(name: &str) -> Result<File, DownloadError> {
match File::create(&name) {
Err(_) => Err(DownloadError::CantCreate),
Ok(f) => Ok(f),
}
}
fn main() {
// Pega os argumentos, mas ignorando o primeiro
// que é o nome do programa.
let mut args = env::args();
args.next();
// Vetor para as threads que serão criadas
let mut workers = vec![];
// Pega o conteúdo de cada URL
for url in args {
// Cria thread para cada URL
workers.push(thread::spawn(move || {
print!("{} - ", url);
match download_content(&url) {
Err(e) => println!("ERR: {:?}", e),
Ok(f) => println!("OK: saved as {:?}", f),
}
print!("\n\n");
}));
}
// Espera as threads terminarem suas tarefas
for worker in workers {
let _ = worker.join();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment