use std::io::{self, BufRead, Read, Write};
use std::net::TcpStream;
use std::str;
fn extract_urls(body: &str) -> Vec<&str> {
let mut urls = vec![];
for line in body.lines() {
if let Some(start) = line.find("src=") {
let start = start + 5;
let end = line[start..].find(" ").unwrap_or_else(|| line.len() - start);
urls.push(&line[start..start + end]);
}
if let Some(start) = line.find("href=") {
let start = start + 6;
let end = line[start..].find(" ").unwrap_or_else(|| line.len() - start);
urls.push(&line[start..start + end]);
}
}
urls
}
fn main() {
let stdin = io::stdin();
for line in stdin.lock().lines() {
let domain = line.unwrap();
let request = format!("GET / HTTP/1.1\r\nHost: {}\r\n\r\n", domain);
let mut stream = TcpStream::connect(format!("{}:80", domain)).unwrap();
stream.write(request.as_bytes()).unwrap();
let mut response = vec![];
stream.read_to_end(&mut response).unwrap();
let response_str = str::from_utf8(&response).unwrap();
let urls = extract_urls(response_str);
for url in urls {
println!("{}", url);
}
}
}
compile
rustc crawUrl.rs
crawl URLs in the DomainList.txt file
cat DomainList.txt | ./crawUrl