Skip to content

Instantly share code, notes, and snippets.

@0x3n0
Created February 3, 2023 08:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 0x3n0/a799e2b43f854341974a03d3fc6f0419 to your computer and use it in GitHub Desktop.
Save 0x3n0/a799e2b43f854341974a03d3fc6f0419 to your computer and use it in GitHub Desktop.
use std::io::{self, BufRead, Read, Write};
use std::net::TcpStream;
use std::str;

fn extract_urls(body: &str) -> Vec<&str> {
    let mut urls = vec![];

    for line in body.lines() {
        if let Some(start) = line.find("src=") {
            let start = start + 5;
            let end = line[start..].find(" ").unwrap_or_else(|| line.len() - start);
            urls.push(&line[start..start + end]);
        }

        if let Some(start) = line.find("href=") {
            let start = start + 6;
            let end = line[start..].find(" ").unwrap_or_else(|| line.len() - start);
            urls.push(&line[start..start + end]);
        }
    }

    urls
}

fn main() {
    let stdin = io::stdin();
    for line in stdin.lock().lines() {
        let domain = line.unwrap();
        let request = format!("GET / HTTP/1.1\r\nHost: {}\r\n\r\n", domain);
        let mut stream = TcpStream::connect(format!("{}:80", domain)).unwrap();
        stream.write(request.as_bytes()).unwrap();
        let mut response = vec![];
        stream.read_to_end(&mut response).unwrap();
        let response_str = str::from_utf8(&response).unwrap();
        let urls = extract_urls(response_str);
        for url in urls {
            println!("{}", url);
        }
    }
}

compile

rustc crawUrl.rs

crawl URLs in the DomainList.txt file

cat DomainList.txt | ./crawUrl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment