Skip to content

Instantly share code, notes, and snippets.

@guitarrapc
Last active January 5, 2019 18:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guitarrapc/113c668ed98db5767f9db6db8be55782 to your computer and use it in GitHub Desktop.
Save guitarrapc/113c668ed98db5767f9db6db8be55782 to your computer and use it in GitHub Desktop.
retrieve sitemap
package main
import (
"encoding/xml"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
)
// SitemapIndex godoc
// @summary sitemapindex
type SitemapIndex struct {
XMLName xml.Name `xml:"sitemapindex"`
SitemapLocations []SitemapLocation `xml:"sitemap"`
}
// SitemapLocation godoc
// @summary element within sitemapindex
type SitemapLocation struct {
Loc string `xml:"loc"`
Lastmod string `xml:"lastmod"`
Changefreq string `xml:"changefreq"`
Priority string `xml:"priority"`
}
// Sitemap godoc
// @summary urlsets
type Sitemap struct {
XMLName xml.Name `xml:"urlset"`
Urls []Url `xml:"url"`
}
// Url godoc
// @summary url element within urlset
type Url struct {
Loc string `xml:"loc"`
Lastmod string `xml:"lastmod"`
}
func main() {
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: %s URL\n", os.Args[0])
os.Exit(1)
}
// sitemapindex
sitemapURL := os.Args[1] + "/sitemap.xml"
index, err := FetchSitemapIndex(sitemapURL)
if err != nil {
fmt.Println(err)
return
}
// sitemaps
urls := make([]Url, 0)
for _, loc := range index.SitemapLocations {
r, err := FetchSitemap(loc.Loc)
if err != nil {
fmt.Println(err)
return
}
urls = append(urls, r.Urls...)
}
// output
for _, url := range urls {
fmt.Println(url.Loc)
}
}
func fetchXML(url string) (data []byte, err error) {
response, err := http.Get(url)
if err != nil {
log.Fatal(err)
} else {
defer response.Body.Close()
data, err = ioutil.ReadAll(response.Body)
if err != nil {
log.Fatal(err)
}
}
return
}
// FetchSitemapIndex godoc
// @summary : Fetch a remote sitemap index
func FetchSitemapIndex(url string) (sitemapIndex *SitemapIndex, err error) {
xmlData, err := fetchXML(url)
if err != nil {
return
}
sitemapIndex = &SitemapIndex{}
err = xml.Unmarshal(xmlData, sitemapIndex)
return
}
// FetchSitemap godoc
// @summary : Fetch a sitemap
func FetchSitemap(url string) (siteMap *Sitemap, err error) {
xmlData, err := fetchXML(url)
if err != nil {
return
}
siteMap = &Sitemap{}
err = xml.Unmarshal(xmlData, siteMap)
return
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment