Skip to content

Instantly share code, notes, and snippets.

Created September 1, 2022 23:53
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Simple web scraper to get a list of English words
package main
import (
const urlBase = ""
const fileName = "allwords.txt"
var startPage = 2066
func main() {
file, err := os.OpenFile(fileName, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
fmt.Printf("unable to open %s: %v\n", fileName, err)
fmt.Printf("starting page scrape...\n")
for i := startPage; i >= 1; i-- {
url := fmt.Sprintf(urlBase, i)
fmt.Printf("page: %v - %s\n", i, url)
if err := scrapePage(file, url); err != nil {
fmt.Printf("unable to scrape '%v': %v\n", url, err)
fmt.Printf("finished scraping, closing file...\n")
if err := file.Close(); err != nil {
fmt.Printf("unable to close output file: %v\n", err)
func scrapePage(wr io.Writer, url string) error {
resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("unable to fetch page '%v', got error: %w", url, err)
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("got status code %v instead of 200", resp.StatusCode)
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return fmt.Errorf("unable to parse document using goquery: %w", err)
doc.Find(" > a").Each(func(i int, s *goquery.Selection) {
if _, err := fmt.Fprintf(wr, "%v\n", s.Text()); err != nil {
fmt.Printf("unable to write '%s' (item '%v' from url '%v') to output: %v\n", s.Text(), i, url, err)
return nil
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment