Last active
March 2, 2024 13:08
-
-
Save patshaughnessy/27b1611e2c912346b929df97998d488d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Code from: http://patshaughnessy.net/2020/1/20/downloading-100000-files-using-async-rust | |
// | |
// Cargo.toml: | |
// [dependencies] | |
// tokio = { version = "0.2", features = ["full"] } | |
// reqwest = { version = "0.10", features = ["json"] } | |
// futures = "0.3" | |
use std::io::prelude::*; | |
use std::fs::File; | |
use std::io::BufReader; | |
use futures::stream::StreamExt; | |
fn read_lines(path: &str) -> std::io::Result<Vec<String>> { | |
let file = File::open(path)?; | |
let reader = BufReader::new(file); | |
Ok( | |
reader.lines().filter_map(Result::ok).collect() | |
) | |
} | |
#[tokio::main] | |
async fn main() -> Result<(), Box<dyn std::error::Error>> { | |
let paths: Vec<String> = read_lines("urls.txt")?; | |
let fetches = futures::stream::iter( | |
paths.into_iter().map(|path| { | |
async move { | |
match reqwest::get(&path).await { | |
Ok(resp) => { | |
match resp.text().await { | |
Ok(text) => { | |
println!("RESPONSE: {} bytes from {}", text.len(), path); | |
} | |
Err(_) => println!("ERROR reading {}", path), | |
} | |
} | |
Err(_) => println!("ERROR downloading {}", path), | |
} | |
} | |
}) | |
).buffer_unordered(100).collect::<Vec<()>>(); | |
fetches.await; | |
Ok(()) | |
} |
Cool - thanks! Yea probably creating one client at the beginning saves a lot of allocations.
Sorry the urls.txt
file is private to my employer; can't share it. But it's just a long text file with one URL per line. Is that what you're looking for?
I change a bit @hariamoor's code to make it works
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let send_fut = client.get(&path).send(); // Move this one out `async move`
async move {
match send_fut.await {
}
}
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have a hunch that you could get better better performance out of this by changing your
main
function as follows:This pools the connections into one client, as recommended in the
reqwest::Client
docs. Would you, by any chance, know where to find the test files, so I can test this out?