-
-
Save adaschevici/039f3037c893e5a7661c319c127884e6 to your computer and use it in GitHub Desktop.
Url title resolver with Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate hyper; | |
extern crate encoding; | |
use hyper::client::Client; | |
use hyper::header::{Headers,ContentType,UserAgent}; | |
use hyper::mime::Mime; | |
use std::io::Read; | |
use encoding::label::encoding_from_whatwg_label; | |
use encoding::{Encoding, DecoderTrap}; | |
fn print_usage() { | |
println!("Usage"); | |
} | |
fn main() { | |
let args: Vec<String> = std::env::args().collect(); | |
match std::env::args().len() { | |
2 => { | |
match parse_title_from_url(args[1].as_ref()) { | |
Some(title) => println!("{}", title), | |
None => println!("No title found") | |
} | |
}, | |
_ => print_usage() | |
} | |
} | |
fn parse_title_from_url(url: &str) -> Option<String> { | |
let mut client = Client::new(); | |
let mut headers = Headers::new(); | |
headers.set(UserAgent("Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36".to_string())); | |
let ret = match client.get(url).headers(headers).send() { | |
Ok(mut res) => { | |
let mut body_bytes = vec![]; | |
let read = res.read_to_end(&mut body_bytes); | |
if read.is_err() { | |
return Some(read.unwrap_err().to_string()); | |
} | |
let mut buf = String::new(); | |
let body = match std::str::from_utf8(&body_bytes[..]) { | |
Ok(body) => body, | |
Err(_) => { | |
res.headers.get::<ContentType>() | |
.and_then(|ct| parse_charset_from_content_type(ct)) | |
.and_then(|charset| encoding_from_whatwg_label(&charset)) | |
.and_then(|encoding| { | |
if encoding.decode_to(&body_bytes[..], DecoderTrap::Replace, &mut buf).is_err() { | |
return None; | |
} | |
Some(()) | |
}); | |
buf.as_ref() | |
} | |
}; | |
Some(parse_str_between("<title>", "</title>", body).to_string()) | |
}, | |
Err(e) => { | |
Some(format!("Error when parsing url: {}", e)) | |
} | |
}; | |
ret | |
} | |
fn parse_str_between<'a>(open: &'a str, close: &'a str, s: &'a str) -> &'a str | |
{ | |
let part_1 = s.splitn(3, &open[..open.len()-1]) | |
.nth(1); | |
let parsed = match part_1 { | |
Some(part) => part.splitn(2, ">") | |
.nth(1) | |
.unwrap() | |
.splitn(2, close) | |
.nth(0), | |
None => None | |
}; | |
match parsed { | |
Some(s) => s, | |
None => "No title found" | |
} | |
} | |
fn parse_charset_from_content_type(header: &ContentType) -> Option<String> { | |
match **header { | |
Mime(_,_,ref params) => { | |
match params.iter().find(|&&(ref ptype, _)| *ptype == hyper::mime::Attr::Charset) { | |
Some(&(_,ref charset)) => Some(charset.to_string()), | |
None => None | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment