Skip to content

Instantly share code, notes, and snippets.

@kitz99
Created July 1, 2018 08:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kitz99/5d3aafe56bd484e218b0c753c48e4b58 to your computer and use it in GitHub Desktop.
Save kitz99/5d3aafe56bd484e218b0c753c48e4b58 to your computer and use it in GitHub Desktop.
package main
import (
"encoding/json"
"fmt"
"strings"
"github.com/antchfx/htmlquery"
)
type Product struct {
Title string `json:"title"`
Description string `json:"description"`
Price string `json:"price"`
}
type ProductList []Product
func errHandler(err error) {
if err != nil {
panic(err)
}
}
func main() {
doc, err := htmlquery.LoadURL("https://kernel-panic.me/scraping.html")
errHandler(err)
result := ProductList{}
productNodes := htmlquery.Find(doc, "//ul[@class='products']/li")
for _, p := range productNodes {
var currentProduct Product
titleNode := htmlquery.FindOne(p, "div/div[@class='product-header']")
currentProduct.Title = strings.TrimSpace(htmlquery.InnerText(titleNode))
descriptionNode := htmlquery.FindOne(p, "div/div[@class='product-body']/p[contains(@class, 'description')]")
currentProduct.Description = strings.TrimSpace(htmlquery.InnerText(descriptionNode))
priceNode := htmlquery.FindOne(p, "div/div[@class='product-footer']/p[contains(@class, 'price')]")
currentProduct.Price = strings.TrimSpace(htmlquery.InnerText(priceNode))
result = append(result, currentProduct)
}
outputBytes, err := json.Marshal(result)
errHandler(err)
fmt.Println(string(outputBytes))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment