-
-
Save tmcw/1d3868adc415d8c5500a2a20f726b695 to your computer and use it in GitHub Desktop.
#[macro_use] | |
extern crate lazy_static; | |
extern crate regex; | |
extern crate reqwest; | |
use std::env; | |
use std::fs::File; | |
use std::io::prelude::*; | |
use regex::*; | |
use reqwest::Client; | |
use reqwest::Error; | |
fn get_redirected_url(url: &str) -> Option<String> { | |
Client::new() | |
.head(url) | |
.send() | |
.map(|resp| resp.url().as_str().to_string()) | |
.ok() | |
} | |
fn main() { | |
let filename = "../tmcw.github.com/_posts/2017-07-01-recently.md"; | |
let mut f = File::open(filename).expect("file not found"); | |
let mut contents = String::new(); | |
f.read_to_string(&mut contents).expect( | |
"something went wrong reading the file", | |
); | |
lazy_static! { | |
static ref AMZN_RE: Regex = Regex::new(r"https?://amzn.to/([0-9A-Za-z]+)").unwrap(); | |
static ref ISBN1: Regex = Regex::new( | |
r"https://www.amazon.com/(?:[A-Za-z\-]+)/dp/(\d{10})/").unwrap(); | |
static ref ISBN2: Regex = Regex::new( | |
r"https://www.amazon.com/gp/product/(\d{10})/").unwrap(); | |
} | |
let captures: Vec<_> = AMZN_RE.captures_iter(&contents).collect(); | |
for cap in captures { | |
get_redirected_url(cap.get(0).unwrap().as_str()).and_then(|redirected_to| { | |
let c1 = ISBN1.captures(&redirected_to); | |
let c2 = ISBN2.captures(&redirected_to); | |
c1.or(c2) | |
}); | |
// .and_then(|capture| { | |
// let isbn = capture.get(1).unwrap().as_str(); | |
// let worldcat_url = format!("http://www.worldcat.org/isbn/{}", isbn); | |
// get_redirected_url(&worldcat_url).map(|worldcat_permalink| { | |
// println!("worldcat permalink:\n{:?}", worldcat_permalink); | |
// // contents = | |
// // contents.replace(capture.get(0).unwrap().as_str(), &worldcat_permalink); | |
// }) | |
// }); | |
} | |
} |
tmcw
commented
Nov 5, 2017
Hi, thanks for sharing this, really. It came at the right time, when I was in the right mood, to play around with rust for the first time :)
I got it to compile by replacing c1.or(c2)
with:
match c1.or(c2) {
Some(c) => Some(c.get(1).unwrap().as_str().to_string()),
None => None,
}
When returning c1
or c2
those both have references to redirected_to
inside them. So passing along one of those makes it unclear who will clean it up (maybe you have to borrow it or something). The way I understand my fix is to finish using it by evaluating the regex and making a string that gets passed along. I believe it's the to_string
that does this. What would have helped me was having my editor tell me exactly what type was returned in that chain in of functions, at this point I'm kind of guessing.
I've never written a big project in a non-gc language. This makes me curious how this all plays out in more complicated code. Again, thanks for sharing.
I think the problem is occurring because you're trying to "prematurely" end the lifetime of a borrowed reference (by implicitly calling drop
on l43
when you close the expression). Even though @paulmach's suggestion works, you can also fix it if you:
- uncomment your code from
l44
tol52
- delete
l43
- insert
})
as a new line onl47
Now it compiles and prints new Worldcat URLs:
for cap in captures {
get_redirected_url(cap.get(0).unwrap().as_str()).and_then(|redirected_to| {
let c1 = ISBN1.captures(&redirected_to);
let c2 = ISBN2.captures(&redirected_to);
c1.or(c2)
.and_then(|capture| {
let isbn = capture.get(1).unwrap().as_str();
let worldcat_url = format!("http://www.worldcat.org/isbn/{}", isbn);
get_redirected_url(&worldcat_url).map(|worldcat_permalink| {
println!("worldcat permalink:\n{:?}", worldcat_permalink);
// contents =
// contents.replace(capture.get(0).unwrap().as_str(), &worldcat_permalink);
})
})
});
}