Created
July 1, 2018 08:53
-
-
Save kitz99/5d3aafe56bd484e218b0c753c48e4b58 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"fmt" | |
"strings" | |
"github.com/antchfx/htmlquery" | |
) | |
type Product struct { | |
Title string `json:"title"` | |
Description string `json:"description"` | |
Price string `json:"price"` | |
} | |
type ProductList []Product | |
func errHandler(err error) { | |
if err != nil { | |
panic(err) | |
} | |
} | |
func main() { | |
doc, err := htmlquery.LoadURL("https://kernel-panic.me/scraping.html") | |
errHandler(err) | |
result := ProductList{} | |
productNodes := htmlquery.Find(doc, "//ul[@class='products']/li") | |
for _, p := range productNodes { | |
var currentProduct Product | |
titleNode := htmlquery.FindOne(p, "div/div[@class='product-header']") | |
currentProduct.Title = strings.TrimSpace(htmlquery.InnerText(titleNode)) | |
descriptionNode := htmlquery.FindOne(p, "div/div[@class='product-body']/p[contains(@class, 'description')]") | |
currentProduct.Description = strings.TrimSpace(htmlquery.InnerText(descriptionNode)) | |
priceNode := htmlquery.FindOne(p, "div/div[@class='product-footer']/p[contains(@class, 'price')]") | |
currentProduct.Price = strings.TrimSpace(htmlquery.InnerText(priceNode)) | |
result = append(result, currentProduct) | |
} | |
outputBytes, err := json.Marshal(result) | |
errHandler(err) | |
fmt.Println(string(outputBytes)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment