Created
May 7, 2020 22:19
-
-
Save gen2brain/1fe7c39004a4eb24f45e2e4fbe3e746e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"encoding/csv" | |
"encoding/json" | |
"flag" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"math/rand" | |
"net" | |
"net/http" | |
"net/http/cookiejar" | |
"os" | |
"os/exec" | |
"os/signal" | |
"path/filepath" | |
"regexp" | |
"runtime" | |
"strconv" | |
"strings" | |
"sync" | |
"syscall" | |
"time" | |
"github.com/PuerkitoBio/goquery" | |
"h12.me/socks" | |
) | |
type contact struct { | |
FullName string `json:"fullName"` | |
FirstName string `json:"firstName"` | |
LastName string `json:"lastName"` | |
Address string `json:"address"` | |
AddressNumber string `json:"addressNumber"` | |
PlaceNumber string `json:"placeNumber"` | |
Place string `json:"place"` | |
Community string `json:"community"` | |
Phone []string `json:"phone"` | |
} | |
var ( | |
mainUrl string = "http://www.11811.rs/belestrane/index" | |
codeUrl string = "http://www.11811.rs/Footer/napraviRandomKod" | |
searchUrl string = "http://www.11811.rs/BeleStrane/Pretraga/%s/sve/%s/%s/sve/%s/%s" // ime,mesto,prezime,strana,kod | |
) | |
var letters = []string{"A", "B", "C", "Č", "Ć", "D", "Đ", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "R", "S", "Š", "T", "U", "V", "Z", "Ž"} | |
var ( | |
dir string | |
counter int | |
name string | |
verbose bool | |
contacts []contact | |
jar http.CookieJar | |
savedPlace string | |
savedFirstName string | |
savedLastName string | |
currentPlace string | |
currentFirstName string | |
currentLastName string | |
wgp sync.WaitGroup | |
wgb sync.WaitGroup | |
wgw sync.WaitGroup | |
) | |
var rnd int = randInt(15, 25) | |
var uas []string = readLines("user-agents.txt") | |
var userAgent string = uas[rand.Intn(len(uas))] | |
func prepareProxyClient() *http.Client { | |
dialSocksProxy := socks.DialSocksProxy(socks.SOCKS5, "127.0.0.1:9050") | |
transport := &http.Transport{ | |
Dial: dialSocksProxy, | |
} | |
return &http.Client{ | |
Jar: jar, | |
Transport: transport, | |
} | |
} | |
func renewIP() { | |
conn, err := net.Dial("tcp", "127.0.0.1:9051") | |
defer conn.Close() | |
if err != nil { | |
log.Printf("Error renewIP: %v\n", err.Error()) | |
} | |
var n int | |
var buff []byte | |
rnd = randInt(15, 25) | |
conn.Write([]byte("AUTHENTICATE\r\n")) | |
buff = make([]byte, 1024) | |
n, _ = conn.Read(buff) | |
if strings.HasPrefix(string(buff[:n]), "250") { | |
conn.Write([]byte("SIGNAL NEWNYM\r\n")) | |
buff = make([]byte, 1024) | |
n, _ = conn.Read(buff) | |
if !strings.HasPrefix(string(buff[:n]), "250") { | |
log.Printf("Error renewIP: %s\n", string(buff[:n])) | |
} | |
if verbose { | |
log.Printf("renewIP: %s", string(buff[:n])) | |
} | |
} else { | |
log.Printf("Error renewIP: %s\n", string(buff[:n])) | |
} | |
} | |
func startTOR() { | |
datadir := dir + string(os.PathSeparator) + "tmp" | |
geoip := dir + string(os.PathSeparator) + "geoip" | |
geoip6 := dir + string(os.PathSeparator) + "geoip6" | |
if runtime.GOOS == "windows" { | |
tor := dir + string(os.PathSeparator) + "tor.exe" | |
cmd := exec.Command(tor, "--DataDirectory", datadir, "--ControlPort", "9051", "--GeoIPFile", geoip, "--GeoIPv6File", geoip6) | |
if verbose { | |
log.Printf("Executing: %s\n", strings.Join(cmd.Args, " ")) | |
} | |
err := cmd.Start() | |
if err != nil { | |
log.Printf("Error exec: %v\n", err) | |
} | |
} else if runtime.GOOS == "linux" { | |
cmd := fmt.Sprintf("tor --User tor --DataDirectory %s --ControlPort 9051 --GeoIPFile %s --GeoIPv6File %s", | |
datadir, geoip, geoip6) | |
if verbose { | |
log.Printf("Executing: %s\n", cmd) | |
} | |
err := exec.Command("sh", "-c", cmd).Start() | |
if err != nil { | |
log.Printf("Error exec: %v\n", err) | |
} | |
} | |
} | |
func httpRequest(uri string, method string) (*http.Response, error) { | |
httpClient := prepareProxyClient() | |
req, err := http.NewRequest(method, uri, nil) | |
if err != nil { | |
return nil, err | |
} | |
req.Close = true | |
req.Header.Set("Connection", "close") | |
req.Header.Set("User-Agent", userAgent) | |
res, err := httpClient.Do(req) | |
if err != nil || res == nil { | |
return nil, err | |
} | |
if res.StatusCode != 200 { | |
return nil, nil | |
} | |
return res, nil | |
} | |
func getDocument(uri string) (*goquery.Document, error) { | |
res, err := httpRequest(uri, "GET") | |
if err != nil { | |
log.Printf("Error httpRequest %s: %v\n", uri, err.Error()) | |
return nil, err | |
} | |
if res == nil { | |
return nil, nil | |
} | |
doc, err := goquery.NewDocumentFromResponse(res) | |
if err != nil { | |
log.Printf("Error NewDocumentFromResponse %s: %v\n", uri, err.Error()) | |
return nil, err | |
} | |
if doc == nil { | |
return nil, nil | |
} | |
return doc, nil | |
} | |
func getCode() (string, error) { | |
res, err := httpRequest(codeUrl, "POST") | |
if err != nil { | |
log.Printf("Error httpRequest %s: %v\n", codeUrl, err.Error()) | |
return "", err | |
} | |
body, _ := ioutil.ReadAll(res.Body) | |
defer res.Body.Close() | |
var data map[string]interface{} | |
err = json.Unmarshal(body, &data) | |
if err != nil { | |
return "", err | |
} | |
code := data["code"].(string) | |
return code, nil | |
} | |
func getResultsNumber(firstName string, lastName string, place string) int { | |
var results int = -1 | |
code, _ := getCode() | |
uri := fmt.Sprintf(searchUrl, firstName, place, lastName, "1", code) | |
var err error | |
var doc *goquery.Document | |
doc, err = getDocument(uri) | |
if err != nil { | |
time.Sleep(1 * time.Second) | |
doc, err = getDocument(uri) | |
if err != nil { | |
return -1 | |
} | |
} | |
re := regexp.MustCompile(`od (\d{1,3})`) | |
s := re.FindAllStringSubmatch(doc.Text(), -1) | |
if len(s) > 0 { | |
results, _ = strconv.Atoi(s[0][1]) | |
} | |
return results | |
} | |
func isTORRunning() bool { | |
_, err := net.Dial("tcp", "127.0.0.1:9050") | |
if err == nil { | |
return true | |
} | |
return false | |
} | |
func isTORControlRunning() bool { | |
_, err := net.Dial("tcp", "127.0.0.1:9051") | |
if err == nil { | |
return true | |
} | |
return false | |
} | |
func isPhoneInContacts(phone string) bool { | |
for _, c := range contacts { | |
if c.Phone[0] == phone { | |
return true | |
} | |
} | |
return false | |
} | |
func readLines(path string) []string { | |
file, err := os.Open(path) | |
if err != nil { | |
return nil | |
} | |
defer file.Close() | |
var lines []string | |
scanner := bufio.NewScanner(file) | |
for scanner.Scan() { | |
lines = append(lines, scanner.Text()) | |
} | |
return lines | |
} | |
func parseHTML(i int, s *goquery.Selection) { | |
//defer wgp.Done() | |
//defer func() { | |
//if r := recover(); r != nil { | |
//log.Print("Recovered in parseHTML: ", r) | |
//} | |
//}() | |
fullName := s.Find(`a`).First().Text() | |
temp := strings.Split(fullName, " ") | |
lastName := temp[0] | |
firstName := temp[len(temp)-1] | |
next := s.Parent().Next() | |
temp2 := strings.Split(strings.TrimSpace(next.Text()), "\n") | |
tempAddress := strings.Split(strings.TrimSpace(temp2[0]), ",") | |
tempAddress2 := strings.Split(tempAddress[0], " ") | |
address := strings.Join(tempAddress2[:len(tempAddress2)-1], " ") | |
addressNumber := tempAddress2[len(tempAddress2)-1] | |
tempPlace := strings.Split(strings.TrimSpace(tempAddress[1]), " ") | |
placeNumber := tempPlace[0] | |
place := strings.Join(tempPlace[1:], " ") | |
community := strings.TrimSpace(temp2[2]) | |
community = strings.Replace(community, "(", "", -1) | |
community = strings.Replace(community, ")", "", -1) | |
temp3 := strings.Split(strings.Trim(next.Next().Text(), " "), "\n") | |
phone := make([]string, 10) | |
for n, v := range temp3[1:] { | |
v = strings.Trim(v, " ") | |
if v != "" { | |
phone[n] = v | |
} | |
} | |
if !isPhoneInContacts(phone[0]) { | |
c := contact{fullName, firstName, lastName, address, addressNumber, placeNumber, place, community, phone} | |
contacts = append(contacts, c) | |
} | |
} | |
func belestrane(firstName string, lastName string, place string) { | |
var pages int | |
jar, _ = cookiejar.New(nil) | |
currentPlace = place | |
currentFirstName = firstName | |
currentLastName = lastName | |
results := getResultsNumber(firstName, lastName, place) | |
if results != -1 { | |
pages = (results / 10) + 1 | |
} else { | |
pages = 1 | |
} | |
getPage := func(page string) { | |
//defer wgb.Done() | |
//defer func() { | |
//if r := recover(); r != nil { | |
//log.Print("Recovered in belestrane: ", r) | |
//} | |
//}() | |
counter++ | |
var err error | |
var code string | |
var doc *goquery.Document | |
var divs *goquery.Selection | |
if len(contacts) > 0 && counter%50 == 0 { | |
saveCSV(name) | |
saveJSON(name) | |
} | |
if counter > 0 && counter%rnd == 0 { | |
renewIP() | |
userAgent = uas[rand.Intn(len(uas))] | |
if verbose { | |
log.Printf("UserAgent changed to %s\n", userAgent) | |
} | |
time.Sleep(1 * time.Second) | |
} | |
code, err = getCode() | |
if err != nil || code == "" { | |
time.Sleep(1 * time.Second) | |
code, _ = getCode() | |
} | |
uri := fmt.Sprintf(searchUrl, firstName, place, lastName, page, code) | |
doc, err = getDocument(uri) | |
if err != nil { | |
time.Sleep(1 * time.Second) | |
doc, err = getDocument(uri) | |
if err != nil { | |
return | |
} | |
} | |
divs = doc.Find(`div.plaviLink`) | |
//wgp.Add(divs.Length()) | |
divs.Each(func(i int, s *goquery.Selection) { | |
//go parseHTML(i, s) | |
parseHTML(i, s) | |
}) | |
//wgp.Wait() | |
if verbose { | |
log.Printf("%s, found: %d, total: %d\n", uri, divs.Length(), len(contacts)) | |
} | |
} | |
//wgb.Add(pages) | |
for i := 1; i <= pages; i++ { | |
page := strconv.Itoa(i) | |
//go getPage(page) | |
getPage(page) | |
} | |
//wgb.Wait() | |
} | |
func saveJSON(name string) { | |
js, err := json.Marshal(contacts) | |
if err != nil { | |
log.Printf("Error saveJSON: %v\n", err.Error()) | |
return | |
} | |
file := dir + string(os.PathSeparator) + "kontakti" + string(os.PathSeparator) + name + ".json" | |
e := ioutil.WriteFile(file, js, 0644) | |
if e != nil { | |
log.Printf("Error saveJSON: %v\n", e.Error()) | |
return | |
} | |
} | |
func saveCSV(name string) { | |
file := dir + string(os.PathSeparator) + "kontakti" + string(os.PathSeparator) + name + ".csv" | |
csvfile, err := os.Create(file) | |
if err != nil { | |
log.Printf("Error saveCSV: %v\n", err.Error()) | |
return | |
} | |
defer csvfile.Close() | |
writer := csv.NewWriter(csvfile) | |
for _, c := range contacts { | |
record := []string{c.LastName, c.FirstName, c.FullName, c.Address, c.AddressNumber, c.PlaceNumber, c.Place, c.Community, | |
c.Phone[0], c.Phone[1], c.Phone[2], c.Phone[3], c.Phone[4], c.Phone[5], c.Phone[6], c.Phone[7], c.Phone[8], c.Phone[9]} | |
err := writer.Write(record) | |
if err != nil { | |
log.Printf("Error saveCSV: %v\n", err.Error()) | |
return | |
} | |
} | |
writer.Flush() | |
} | |
func loadJSON(filename string) { | |
if verbose { | |
log.Printf("Loading file %s\n", filename) | |
} | |
file, err := ioutil.ReadFile(filename) | |
if err != nil { | |
log.Printf("Error loadJSON: %v\n", err.Error()) | |
} | |
contacts = make([]contact, 0) | |
e := json.Unmarshal(file, &contacts) | |
if e != nil { | |
log.Printf("Error loadJSON: %v\n", e.Error()) | |
} | |
} | |
func randInt(min int, max int) int { | |
return min + rand.Intn(max-min) | |
} | |
func main() { | |
verb := flag.Bool("verbose", false, "Verbose output") | |
mesta := flag.String("mesto", "sva-mesta.txt", "Fajl sa mestima") | |
flag.Parse() | |
verbose = *verb | |
rand.Seed(time.Now().UTC().UnixNano()) | |
dir, _ = filepath.Abs(filepath.Dir(os.Args[0])) | |
c := make(chan os.Signal, 3) | |
signal.Notify(c, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM) | |
go func() { | |
for sig := range c { | |
log.Printf("Captured %v, saving progress and exiting...", sig) | |
if len(contacts) > 0 { | |
saveCSV(name) | |
saveJSON(name) | |
} | |
if currentPlace != "" { | |
cur := fmt.Sprintf("%s,%s,%s\n", currentPlace, currentFirstName, currentLastName) | |
ioutil.WriteFile(dir+string(os.PathSeparator)+"current.txt", []byte(cur), 0644) | |
} | |
os.Exit(1) | |
} | |
}() | |
if !isTORRunning() { | |
startTOR() | |
time.Sleep(3 * time.Second) | |
if !isTORRunning() { | |
log.Fatal("TOR is not listening on port 9050.\n") | |
} | |
if !isTORControlRunning() { | |
log.Fatal("TOR is not listening on control port 9051.\n") | |
} | |
} | |
filename := filepath.Base(*mesta) | |
extension := filepath.Ext(filename) | |
name = strings.TrimRight(filename, extension) | |
jsonfile := dir + string(os.PathSeparator) + "kontakti" + string(os.PathSeparator) + name + ".json" | |
if _, err := os.Stat(jsonfile); err == nil { | |
loadJSON(jsonfile) | |
} | |
var places []string | |
if _, err := os.Stat(*mesta); err == nil { | |
places = readLines(*mesta) | |
} else { | |
places = strings.Split(*mesta, ",") | |
} | |
if _, err := os.Stat(dir + string(os.PathSeparator) + "current.txt"); err == nil { | |
lines := readLines(dir + string(os.PathSeparator) + "current.txt") | |
split := strings.Split(lines[0], ",") | |
savedPlace = split[0] | |
savedFirstName = split[1] | |
savedLastName = split[2] | |
log.Printf("current.txt file found, continuing from %s, %s, %s\n", savedPlace, savedFirstName, savedLastName) | |
} | |
PLACES: | |
for _, place := range places { | |
if savedPlace != "" { | |
if savedPlace != place { | |
continue PLACES | |
} else { | |
savedPlace = "" | |
} | |
} | |
FIRSTNAME: | |
for _, firstName := range letters { | |
if savedFirstName != "" { | |
if savedFirstName != firstName { | |
continue FIRSTNAME | |
} else { | |
savedFirstName = "" | |
} | |
} | |
LASTNAME: | |
for _, lastName := range letters { | |
if savedLastName != "" { | |
if savedLastName != lastName { | |
continue LASTNAME | |
} else { | |
savedLastName = "" | |
} | |
} | |
belestrane(firstName, lastName, place) | |
} | |
} | |
} | |
saveCSV(name) | |
saveJSON(name) | |
log.Printf("Total: %d", len(contacts)) | |
os.Remove(dir + string(os.PathSeparator) + "current.txt") | |
os.Exit(0) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment