Skip to content

Instantly share code, notes, and snippets.

@AmyrAhmady
Last active December 30, 2019 08:10
Show Gist options
  • Save AmyrAhmady/9ee8dd0e8acf3f2963dd9fdbcbcc88f7 to your computer and use it in GitHub Desktop.
Save AmyrAhmady/9ee8dd0e8acf3f2963dd9fdbcbcc88f7 to your computer and use it in GitHub Desktop.
aaaaa
package main
import (
"fmt"
"io/ioutil"
"github.com/anaskhan96/soup"
"strings"
"regexp"
"os"
)
func main() {
b, err := ioutil.ReadFile("file.html")
if err != nil {
fmt.Print(err)
}
str := string(b)
r := regexp.MustCompile(`https:[^\s"><]*\bdetail\/[0-9]*`)
matches := r.FindAllString(str, -1)
file, err := os.Create("links.json")
if err != nil {
fmt.Println(err)
return
}
file.WriteString("{\n")
file.WriteString("\t" + `"ads": [` + "\n")
for index, each := range matches {
if (index % 2) == 1 {
fmt.Println(index -1)
file.WriteString("\t\t{\n")
resp, err := soup.Get(each)
if err != nil {
continue
}
doc := soup.HTMLParse(resp)
if doc.Error != nil {
continue
}
itemsTemp := doc.Find("div", "class", "detail_part3")
if itemsTemp.Error != nil {
fmt.Println(itemsTemp.Error)
continue
}
items := doc.Find("div", "class", "detail_part3").FindAll("li")
if len(items) < 1 {
continue
}
file.WriteString("\t\t\t" + `"link": ` + `"` + each + `"` + ", \n")
file.WriteString("\t\t\t" + `"title": ` + `"` + doc.Find("div", "class", "title").Find("h1").Text() + `"` + ", \n")
for itemIndex, item := range items {
hasUsefulSpan := false
if item.Find("span", "class", "name").Text() == "گروه آگهی :: " {
catsAndSubCats := item.Find("span", "class", "val").FindAll("a")
file.WriteString("\t\t\t" + `"category": ` + `"` + catsAndSubCats[0].Text() + `"` + ", \n")
if len(catsAndSubCats) > 1 {
file.WriteString("\t\t\t" + `"subcategory": ` + `"` + catsAndSubCats[1].Text() + `"`)
}
hasUsefulSpan = true
} else if item.Find("span", "class", "name").Text() == "نام و نام خانوادگی :: " {
file.WriteString("\t\t\t" + `"name": ` + `"` + item.Find("span", "class", "val").Text() + `"`)
hasUsefulSpan = true
} else if item.Find("span", "class", "name").Text() == "تلفن تماس :: " {
file.WriteString("\t\t\t" + `"phone": ` + `"` + item.Find("span", "class", "val").Text() + `"`)
hasUsefulSpan = true
} else if item.Find("span", "class", "name").Text() == "لینک :: " {
file.WriteString("\t\t\t" + `"website": ` + `"` + item.Find("span", "class", "val").Find("a").Text() + `"`)
hasUsefulSpan = true
} else if item.Find("span", "class", "name").Text() == "موقعیت :: " {
file.WriteString("\t\t\t" + `"location": ` + `"` + item.Find("span", "class", "val").Text() + `"`)
hasUsefulSpan = true
} else if item.Find("span", "class", "name").Text() == "نشانی :: " {
file.WriteString("\t\t\t" + `"address": ` + `"` + strings.Replace(item.Find("span", "class", "val").Text(), "\n", " ", -1) + `"`)
hasUsefulSpan = true
}
if (len(items) - 1) == itemIndex {
file.WriteString("\n")
} else {
if hasUsefulSpan == true {
file.WriteString(",\n")
}
}
}
if (len(matches) - 1) == index {
file.WriteString("\t\t}\n")
} else {
file.WriteString("\t\t},\n")
}
}
}
file.WriteString("\t]\n")
file.WriteString("}")
err = file.Close()
if err != nil {
fmt.Println(err)
return
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment