Skip to content

Instantly share code, notes, and snippets.

@gen2brain
Created May 7, 2020 22:19
Show Gist options
  • Save gen2brain/1fe7c39004a4eb24f45e2e4fbe3e746e to your computer and use it in GitHub Desktop.
Save gen2brain/1fe7c39004a4eb24f45e2e4fbe3e746e to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"encoding/csv"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"math/rand"
"net"
"net/http"
"net/http/cookiejar"
"os"
"os/exec"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/PuerkitoBio/goquery"
"h12.me/socks"
)
type contact struct {
FullName string `json:"fullName"`
FirstName string `json:"firstName"`
LastName string `json:"lastName"`
Address string `json:"address"`
AddressNumber string `json:"addressNumber"`
PlaceNumber string `json:"placeNumber"`
Place string `json:"place"`
Community string `json:"community"`
Phone []string `json:"phone"`
}
var (
mainUrl string = "http://www.11811.rs/belestrane/index"
codeUrl string = "http://www.11811.rs/Footer/napraviRandomKod"
searchUrl string = "http://www.11811.rs/BeleStrane/Pretraga/%s/sve/%s/%s/sve/%s/%s" // ime,mesto,prezime,strana,kod
)
var letters = []string{"A", "B", "C", "Č", "Ć", "D", "Đ", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "R", "S", "Š", "T", "U", "V", "Z", "Ž"}
var (
dir string
counter int
name string
verbose bool
contacts []contact
jar http.CookieJar
savedPlace string
savedFirstName string
savedLastName string
currentPlace string
currentFirstName string
currentLastName string
wgp sync.WaitGroup
wgb sync.WaitGroup
wgw sync.WaitGroup
)
var rnd int = randInt(15, 25)
var uas []string = readLines("user-agents.txt")
var userAgent string = uas[rand.Intn(len(uas))]
func prepareProxyClient() *http.Client {
dialSocksProxy := socks.DialSocksProxy(socks.SOCKS5, "127.0.0.1:9050")
transport := &http.Transport{
Dial: dialSocksProxy,
}
return &http.Client{
Jar: jar,
Transport: transport,
}
}
func renewIP() {
conn, err := net.Dial("tcp", "127.0.0.1:9051")
defer conn.Close()
if err != nil {
log.Printf("Error renewIP: %v\n", err.Error())
}
var n int
var buff []byte
rnd = randInt(15, 25)
conn.Write([]byte("AUTHENTICATE\r\n"))
buff = make([]byte, 1024)
n, _ = conn.Read(buff)
if strings.HasPrefix(string(buff[:n]), "250") {
conn.Write([]byte("SIGNAL NEWNYM\r\n"))
buff = make([]byte, 1024)
n, _ = conn.Read(buff)
if !strings.HasPrefix(string(buff[:n]), "250") {
log.Printf("Error renewIP: %s\n", string(buff[:n]))
}
if verbose {
log.Printf("renewIP: %s", string(buff[:n]))
}
} else {
log.Printf("Error renewIP: %s\n", string(buff[:n]))
}
}
func startTOR() {
datadir := dir + string(os.PathSeparator) + "tmp"
geoip := dir + string(os.PathSeparator) + "geoip"
geoip6 := dir + string(os.PathSeparator) + "geoip6"
if runtime.GOOS == "windows" {
tor := dir + string(os.PathSeparator) + "tor.exe"
cmd := exec.Command(tor, "--DataDirectory", datadir, "--ControlPort", "9051", "--GeoIPFile", geoip, "--GeoIPv6File", geoip6)
if verbose {
log.Printf("Executing: %s\n", strings.Join(cmd.Args, " "))
}
err := cmd.Start()
if err != nil {
log.Printf("Error exec: %v\n", err)
}
} else if runtime.GOOS == "linux" {
cmd := fmt.Sprintf("tor --User tor --DataDirectory %s --ControlPort 9051 --GeoIPFile %s --GeoIPv6File %s",
datadir, geoip, geoip6)
if verbose {
log.Printf("Executing: %s\n", cmd)
}
err := exec.Command("sh", "-c", cmd).Start()
if err != nil {
log.Printf("Error exec: %v\n", err)
}
}
}
func httpRequest(uri string, method string) (*http.Response, error) {
httpClient := prepareProxyClient()
req, err := http.NewRequest(method, uri, nil)
if err != nil {
return nil, err
}
req.Close = true
req.Header.Set("Connection", "close")
req.Header.Set("User-Agent", userAgent)
res, err := httpClient.Do(req)
if err != nil || res == nil {
return nil, err
}
if res.StatusCode != 200 {
return nil, nil
}
return res, nil
}
func getDocument(uri string) (*goquery.Document, error) {
res, err := httpRequest(uri, "GET")
if err != nil {
log.Printf("Error httpRequest %s: %v\n", uri, err.Error())
return nil, err
}
if res == nil {
return nil, nil
}
doc, err := goquery.NewDocumentFromResponse(res)
if err != nil {
log.Printf("Error NewDocumentFromResponse %s: %v\n", uri, err.Error())
return nil, err
}
if doc == nil {
return nil, nil
}
return doc, nil
}
func getCode() (string, error) {
res, err := httpRequest(codeUrl, "POST")
if err != nil {
log.Printf("Error httpRequest %s: %v\n", codeUrl, err.Error())
return "", err
}
body, _ := ioutil.ReadAll(res.Body)
defer res.Body.Close()
var data map[string]interface{}
err = json.Unmarshal(body, &data)
if err != nil {
return "", err
}
code := data["code"].(string)
return code, nil
}
func getResultsNumber(firstName string, lastName string, place string) int {
var results int = -1
code, _ := getCode()
uri := fmt.Sprintf(searchUrl, firstName, place, lastName, "1", code)
var err error
var doc *goquery.Document
doc, err = getDocument(uri)
if err != nil {
time.Sleep(1 * time.Second)
doc, err = getDocument(uri)
if err != nil {
return -1
}
}
re := regexp.MustCompile(`od (\d{1,3})`)
s := re.FindAllStringSubmatch(doc.Text(), -1)
if len(s) > 0 {
results, _ = strconv.Atoi(s[0][1])
}
return results
}
func isTORRunning() bool {
_, err := net.Dial("tcp", "127.0.0.1:9050")
if err == nil {
return true
}
return false
}
func isTORControlRunning() bool {
_, err := net.Dial("tcp", "127.0.0.1:9051")
if err == nil {
return true
}
return false
}
func isPhoneInContacts(phone string) bool {
for _, c := range contacts {
if c.Phone[0] == phone {
return true
}
}
return false
}
func readLines(path string) []string {
file, err := os.Open(path)
if err != nil {
return nil
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines
}
func parseHTML(i int, s *goquery.Selection) {
//defer wgp.Done()
//defer func() {
//if r := recover(); r != nil {
//log.Print("Recovered in parseHTML: ", r)
//}
//}()
fullName := s.Find(`a`).First().Text()
temp := strings.Split(fullName, " ")
lastName := temp[0]
firstName := temp[len(temp)-1]
next := s.Parent().Next()
temp2 := strings.Split(strings.TrimSpace(next.Text()), "\n")
tempAddress := strings.Split(strings.TrimSpace(temp2[0]), ",")
tempAddress2 := strings.Split(tempAddress[0], " ")
address := strings.Join(tempAddress2[:len(tempAddress2)-1], " ")
addressNumber := tempAddress2[len(tempAddress2)-1]
tempPlace := strings.Split(strings.TrimSpace(tempAddress[1]), " ")
placeNumber := tempPlace[0]
place := strings.Join(tempPlace[1:], " ")
community := strings.TrimSpace(temp2[2])
community = strings.Replace(community, "(", "", -1)
community = strings.Replace(community, ")", "", -1)
temp3 := strings.Split(strings.Trim(next.Next().Text(), " "), "\n")
phone := make([]string, 10)
for n, v := range temp3[1:] {
v = strings.Trim(v, " ")
if v != "" {
phone[n] = v
}
}
if !isPhoneInContacts(phone[0]) {
c := contact{fullName, firstName, lastName, address, addressNumber, placeNumber, place, community, phone}
contacts = append(contacts, c)
}
}
func belestrane(firstName string, lastName string, place string) {
var pages int
jar, _ = cookiejar.New(nil)
currentPlace = place
currentFirstName = firstName
currentLastName = lastName
results := getResultsNumber(firstName, lastName, place)
if results != -1 {
pages = (results / 10) + 1
} else {
pages = 1
}
getPage := func(page string) {
//defer wgb.Done()
//defer func() {
//if r := recover(); r != nil {
//log.Print("Recovered in belestrane: ", r)
//}
//}()
counter++
var err error
var code string
var doc *goquery.Document
var divs *goquery.Selection
if len(contacts) > 0 && counter%50 == 0 {
saveCSV(name)
saveJSON(name)
}
if counter > 0 && counter%rnd == 0 {
renewIP()
userAgent = uas[rand.Intn(len(uas))]
if verbose {
log.Printf("UserAgent changed to %s\n", userAgent)
}
time.Sleep(1 * time.Second)
}
code, err = getCode()
if err != nil || code == "" {
time.Sleep(1 * time.Second)
code, _ = getCode()
}
uri := fmt.Sprintf(searchUrl, firstName, place, lastName, page, code)
doc, err = getDocument(uri)
if err != nil {
time.Sleep(1 * time.Second)
doc, err = getDocument(uri)
if err != nil {
return
}
}
divs = doc.Find(`div.plaviLink`)
//wgp.Add(divs.Length())
divs.Each(func(i int, s *goquery.Selection) {
//go parseHTML(i, s)
parseHTML(i, s)
})
//wgp.Wait()
if verbose {
log.Printf("%s, found: %d, total: %d\n", uri, divs.Length(), len(contacts))
}
}
//wgb.Add(pages)
for i := 1; i <= pages; i++ {
page := strconv.Itoa(i)
//go getPage(page)
getPage(page)
}
//wgb.Wait()
}
func saveJSON(name string) {
js, err := json.Marshal(contacts)
if err != nil {
log.Printf("Error saveJSON: %v\n", err.Error())
return
}
file := dir + string(os.PathSeparator) + "kontakti" + string(os.PathSeparator) + name + ".json"
e := ioutil.WriteFile(file, js, 0644)
if e != nil {
log.Printf("Error saveJSON: %v\n", e.Error())
return
}
}
func saveCSV(name string) {
file := dir + string(os.PathSeparator) + "kontakti" + string(os.PathSeparator) + name + ".csv"
csvfile, err := os.Create(file)
if err != nil {
log.Printf("Error saveCSV: %v\n", err.Error())
return
}
defer csvfile.Close()
writer := csv.NewWriter(csvfile)
for _, c := range contacts {
record := []string{c.LastName, c.FirstName, c.FullName, c.Address, c.AddressNumber, c.PlaceNumber, c.Place, c.Community,
c.Phone[0], c.Phone[1], c.Phone[2], c.Phone[3], c.Phone[4], c.Phone[5], c.Phone[6], c.Phone[7], c.Phone[8], c.Phone[9]}
err := writer.Write(record)
if err != nil {
log.Printf("Error saveCSV: %v\n", err.Error())
return
}
}
writer.Flush()
}
func loadJSON(filename string) {
if verbose {
log.Printf("Loading file %s\n", filename)
}
file, err := ioutil.ReadFile(filename)
if err != nil {
log.Printf("Error loadJSON: %v\n", err.Error())
}
contacts = make([]contact, 0)
e := json.Unmarshal(file, &contacts)
if e != nil {
log.Printf("Error loadJSON: %v\n", e.Error())
}
}
func randInt(min int, max int) int {
return min + rand.Intn(max-min)
}
func main() {
verb := flag.Bool("verbose", false, "Verbose output")
mesta := flag.String("mesto", "sva-mesta.txt", "Fajl sa mestima")
flag.Parse()
verbose = *verb
rand.Seed(time.Now().UTC().UnixNano())
dir, _ = filepath.Abs(filepath.Dir(os.Args[0]))
c := make(chan os.Signal, 3)
signal.Notify(c, os.Interrupt, syscall.SIGHUP, syscall.SIGTERM)
go func() {
for sig := range c {
log.Printf("Captured %v, saving progress and exiting...", sig)
if len(contacts) > 0 {
saveCSV(name)
saveJSON(name)
}
if currentPlace != "" {
cur := fmt.Sprintf("%s,%s,%s\n", currentPlace, currentFirstName, currentLastName)
ioutil.WriteFile(dir+string(os.PathSeparator)+"current.txt", []byte(cur), 0644)
}
os.Exit(1)
}
}()
if !isTORRunning() {
startTOR()
time.Sleep(3 * time.Second)
if !isTORRunning() {
log.Fatal("TOR is not listening on port 9050.\n")
}
if !isTORControlRunning() {
log.Fatal("TOR is not listening on control port 9051.\n")
}
}
filename := filepath.Base(*mesta)
extension := filepath.Ext(filename)
name = strings.TrimRight(filename, extension)
jsonfile := dir + string(os.PathSeparator) + "kontakti" + string(os.PathSeparator) + name + ".json"
if _, err := os.Stat(jsonfile); err == nil {
loadJSON(jsonfile)
}
var places []string
if _, err := os.Stat(*mesta); err == nil {
places = readLines(*mesta)
} else {
places = strings.Split(*mesta, ",")
}
if _, err := os.Stat(dir + string(os.PathSeparator) + "current.txt"); err == nil {
lines := readLines(dir + string(os.PathSeparator) + "current.txt")
split := strings.Split(lines[0], ",")
savedPlace = split[0]
savedFirstName = split[1]
savedLastName = split[2]
log.Printf("current.txt file found, continuing from %s, %s, %s\n", savedPlace, savedFirstName, savedLastName)
}
PLACES:
for _, place := range places {
if savedPlace != "" {
if savedPlace != place {
continue PLACES
} else {
savedPlace = ""
}
}
FIRSTNAME:
for _, firstName := range letters {
if savedFirstName != "" {
if savedFirstName != firstName {
continue FIRSTNAME
} else {
savedFirstName = ""
}
}
LASTNAME:
for _, lastName := range letters {
if savedLastName != "" {
if savedLastName != lastName {
continue LASTNAME
} else {
savedLastName = ""
}
}
belestrane(firstName, lastName, place)
}
}
}
saveCSV(name)
saveJSON(name)
log.Printf("Total: %d", len(contacts))
os.Remove(dir + string(os.PathSeparator) + "current.txt")
os.Exit(0)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment