Skip to content

Instantly share code, notes, and snippets.

@inotnako
Last active August 29, 2023 11:38
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save inotnako/c4a82f6723f6ccea5d83c5d3689373dd to your computer and use it in GitHub Desktop.
Save inotnako/c4a82f6723f6ccea5d83c5d3689373dd to your computer and use it in GitHub Desktop.
get meta from html page
package main
import (
"encoding/json"
"net/http"
"net/url"
"golang.org/x/net/html"
"io"
)
func main() {
http.HandleFunc(`/read`, func(rw http.ResponseWriter, req *http.Request) {
rw.Header().Set(`Content-Type`, `application/json`)
err := req.ParseForm()
if err != nil {
rw.WriteHeader(http.StatusBadRequest)
json.NewEncoder(rw).Encode(map[string]string{"error": err.Error()})
return
}
link := req.FormValue(`link`)
if link == "" {
rw.WriteHeader(http.StatusBadRequest)
json.NewEncoder(rw).Encode(map[string]string{"error": `empty value of link`})
return
}
if _, err := url.Parse(link); err != nil {
rw.WriteHeader(http.StatusBadRequest)
json.NewEncoder(rw).Encode(map[string]string{"error": err.Error()})
return
}
resp, err := http.Get(link)
if err != nil {
//proxy status and err
rw.WriteHeader(resp.StatusCode)
json.NewEncoder(rw).Encode(map[string]string{"error": err.Error()})
return
}
defer resp.Body.Close()
meta := extract(resp.Body)
rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(meta)
return
})
// little help %)
println("call like: \n$ curl -XPOST 'http://localhost:4567/read' -d link='https://github.com/golang/go'")
println(`{"title":"golang/go","description":"go - The Go programming language","image":"https://avatars1.githubusercontent.com/u/4314092?v=3\u0026s=400","site_name":"GitHub"}`)
err := http.ListenAndServe(`:4567`, nil)
if err != nil {
panic(err)
}
}
type HTMLMeta struct {
Title string `json:"title"`
Description string `json:"description"`
Image string `json:"image"`
SiteName string `json:"site_name"`
}
func extract(resp io.Reader) *HTMLMeta {
z := html.NewTokenizer(resp)
titleFound := false
hm := new(HTMLMeta)
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return hm
case html.StartTagToken, html.SelfClosingTagToken:
t := z.Token()
if t.Data == `body` {
return hm
}
if t.Data == "title" {
titleFound = true
}
if t.Data == "meta" {
desc, ok := extractMetaProperty(t, "description")
if ok {
hm.Description = desc
}
ogTitle, ok := extractMetaProperty(t, "og:title")
if ok {
hm.Title = ogTitle
}
ogDesc, ok := extractMetaProperty(t, "og:description")
if ok {
hm.Description = ogDesc
}
ogImage, ok := extractMetaProperty(t, "og:image")
if ok {
hm.Image = ogImage
}
ogSiteName, ok := extractMetaProperty(t, "og:site_name")
if ok {
hm.SiteName = ogSiteName
}
}
case html.TextToken:
if titleFound {
t := z.Token()
hm.Title = t.Data
titleFound = false
}
}
}
return hm
}
func extractMetaProperty(t html.Token, prop string) (content string, ok bool) {
for _, attr := range t.Attr {
if attr.Key == "property" && attr.Val == prop {
ok = true
}
if attr.Key == "content" {
content = attr.Val
}
}
return
}
@konjoot
Copy link

konjoot commented Dec 16, 2016

Very clean and impressive 👍

@id6827
Copy link

id6827 commented Nov 9, 2022

👍

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment