Skip to content

Instantly share code, notes, and snippets.

@markdtw
Last active November 12, 2022 19:49
Show Gist options
  • Save markdtw/f3049da3fa68fcc0822ff7284b262000 to your computer and use it in GitHub Desktop.
Save markdtw/f3049da3fa68fcc0822ff7284b262000 to your computer and use it in GitHub Desktop.
/*
Mimic a browser when a user only types in the domain name in the address bar
without the scheme, e.g., https://, by sending requests to https://<domain>,
https://www.<domain>, http://<domain>, and http://www.<domain> to figure out
which one leads to the landing page.
Usage:
$> go build .
$> echo facebook.com | domain2URL
Results will be written to ./urlgo.txt when we find a proper landing page, or
./urlerrgo.txt when we fail.
Tested with Go 1.19
Author: markdtw
*/
package main
import (
"bufio"
"fmt"
"log"
"net/http"
"os"
"strings"
"sync"
"time"
)
const (
userAgent = "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0"
)
type reqResult struct {
success bool
out string
}
func sendRequest(i int, domain string, client *http.Client, outC chan reqResult) {
success := false
schemes := []string{"https://", "https://www.", "http://", "http://www."}
for _, scheme := range schemes {
url := scheme + domain
req, err := http.NewRequest(http.MethodHead, url, nil)
if err != nil {
msg := fmt.Sprintf("\t%07d: %s...%s", i, url, err.Error())
log.Println(msg)
continue
}
req.Header.Set("User-Agent", userAgent)
req.Header.Set("Host", domain)
res, err := client.Do(req)
if err != nil {
msg := fmt.Sprintf("\t%07d: %s...%s", i, url, err.Error())
log.Println(msg)
continue
}
if res.StatusCode >= 200 && res.StatusCode < 400 {
msg := fmt.Sprintf("%07d: %s...GOOD", i, url)
log.Println(msg)
outC <- reqResult{success: true, out: url}
success = true
break
} else {
msg := fmt.Sprintf("\t%07d: %s...%d", i, url, res.StatusCode)
log.Println(msg)
}
}
if !success {
outC <- reqResult{success: false, out: domain}
}
}
func main() {
// to read inputs from stdin
scnr := bufio.NewScanner(os.Stdin)
// file handling for writing the results
fp, err := os.Create("./urlgo.txt")
if err != nil {
log.Fatal(err)
}
defer fp.Close()
wtr := bufio.NewWriter(fp)
defer wtr.Flush()
_, err = wtr.WriteString("User-Agent: " + userAgent + "\n\n")
if err != nil {
log.Fatal(err)
}
fpe, err := os.Create("./urlerrgo.txt")
if err != nil {
log.Fatal(err)
}
defer fpe.Close()
wtre := bufio.NewWriter(fpe)
defer wtre.Flush()
_, err = wtre.WriteString("User-Agent: " + userAgent + "\n\n")
if err != nil {
log.Fatal(err)
}
// need this custom client to set the redirect policy and timeout
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
Timeout: 10 * time.Second,
}
outC := make(chan reqResult) // to receive the results from the goroutines
var wg = sync.WaitGroup{}
maxGoroutines := 100 // can be tuned
sem := make(chan struct{}, maxGoroutines) // to bound the number of goroutines
go func() {
for result := range outC {
if result.success {
_, err = wtr.WriteString(result.out + "\n")
if err != nil {
log.Fatal(err)
}
} else {
_, err = wtre.WriteString(result.out + "\n")
if err != nil {
log.Fatal(err)
}
}
<-sem // release a slot from the channel
wg.Done()
}
}()
i := 0
for {
scnr.Scan()
line := scnr.Text()
if len(line) == 0 {
break
}
domain := strings.TrimSpace(line)
sem <- struct{}{} // blocked if the channel is full
wg.Add(1)
go func(i int) {
sendRequest(i, domain, client, outC)
}(i)
i++
}
wg.Wait()
close(outC)
log.Println("Done!")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment