Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save adaschevici/039f3037c893e5a7661c319c127884e6 to your computer and use it in GitHub Desktop.
Save adaschevici/039f3037c893e5a7661c319c127884e6 to your computer and use it in GitHub Desktop.
Url title resolver with Rust
extern crate hyper;
extern crate encoding;
use hyper::client::Client;
use hyper::header::{Headers,ContentType,UserAgent};
use hyper::mime::Mime;
use std::io::Read;
use encoding::label::encoding_from_whatwg_label;
use encoding::{Encoding, DecoderTrap};
fn print_usage() {
println!("Usage");
}
fn main() {
let args: Vec<String> = std::env::args().collect();
match std::env::args().len() {
2 => {
match parse_title_from_url(args[1].as_ref()) {
Some(title) => println!("{}", title),
None => println!("No title found")
}
},
_ => print_usage()
}
}
fn parse_title_from_url(url: &str) -> Option<String> {
let mut client = Client::new();
let mut headers = Headers::new();
headers.set(UserAgent("Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36".to_string()));
let ret = match client.get(url).headers(headers).send() {
Ok(mut res) => {
let mut body_bytes = vec![];
let read = res.read_to_end(&mut body_bytes);
if read.is_err() {
return Some(read.unwrap_err().to_string());
}
let mut buf = String::new();
let body = match std::str::from_utf8(&body_bytes[..]) {
Ok(body) => body,
Err(_) => {
res.headers.get::<ContentType>()
.and_then(|ct| parse_charset_from_content_type(ct))
.and_then(|charset| encoding_from_whatwg_label(&charset))
.and_then(|encoding| {
if encoding.decode_to(&body_bytes[..], DecoderTrap::Replace, &mut buf).is_err() {
return None;
}
Some(())
});
buf.as_ref()
}
};
Some(parse_str_between("<title>", "</title>", body).to_string())
},
Err(e) => {
Some(format!("Error when parsing url: {}", e))
}
};
ret
}
fn parse_str_between<'a>(open: &'a str, close: &'a str, s: &'a str) -> &'a str
{
let part_1 = s.splitn(3, &open[..open.len()-1])
.nth(1);
let parsed = match part_1 {
Some(part) => part.splitn(2, ">")
.nth(1)
.unwrap()
.splitn(2, close)
.nth(0),
None => None
};
match parsed {
Some(s) => s,
None => "No title found"
}
}
fn parse_charset_from_content_type(header: &ContentType) -> Option<String> {
match **header {
Mime(_,_,ref params) => {
match params.iter().find(|&&(ref ptype, _)| *ptype == hyper::mime::Attr::Charset) {
Some(&(_,ref charset)) => Some(charset.to_string()),
None => None
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment