Skip to content

Instantly share code, notes, and snippets.

@iphelix

iphelix/0xpoland4.go Secret

Created Dec 22, 2020
Embed
What would you like to do?
package main
import (
"fmt"
"log"
"math/rand"
"os"
"regexp"
"strings"
"sync"
"time"
"github.com/gocolly/colly"
)
var (
searchString string = "http://www.metalstorm.net/bands/album.php?album_id=%d"
contentRegexp *regexp.Regexp = regexp.MustCompile("[0-9]+. (.+?)")
tracksRegexp *regexp.Regexp = regexp.MustCompile("[0-9]+. ")
)
func generate(out chan<- string, done <-chan struct{}, wg *sync.WaitGroup) {
wg.Add(1)
go func() {
defer wg.Done()
for i := 77544; i < 125000; i++ {
out <- fmt.Sprintf(searchString, i)
}
}()
}
func scrape(in <-chan string, out chan<- string, done chan struct{}, wg *sync.WaitGroup) {
wg.Add(1)
go func() {
defer wg.Done()
for url := range in {
select {
case <-done:
return
default:
min := 100
max := 1000
sleepTime := time.Duration(rand.Intn(max-min) + min)
// Sleep a bit not to overload the server
time.Sleep(sleepTime * time.Millisecond)
c := colly.NewCollector(
colly.AllowedDomains("www.metalstorm.net"),
)
var album []string
c.OnHTML("div.col-xs-12", func(e *colly.HTMLElement) {
split := tracksRegexp.Split(e.Text, -1)
for _, track := range split {
track = strings.Replace(track, " Lyrics (", "", -1)
track = strings.Replace(track, " [", "\n", -1)
track = strings.Replace(track, "[", "\n", -1)
track = strings.Replace(track, "]", "", -1)
album = append(album, track)
}
})
c.OnHTML("div.page_title", func(e *colly.HTMLElement) {
artistTitle := strings.Split(strings.TrimSpace(e.Text), " - ")
if len(artistTitle) == 2 {
artist := artistTitle[0]
title := artistTitle[1]
fmt.Printf("%s - %s\n", artist, title)
album = append(album, artist)
album = append(album, title)
}
})
// Before making a request print "Visiting ..."
c.OnRequest(func(r *colly.Request) {
log.Println("Visiting", r.URL.String())
})
c.Visit(url)
out <- strings.Join(album, "\n") + "\n"
}
}
}()
return
}
func store(in <-chan string, done chan struct{}, wg *sync.WaitGroup) {
wg.Add(1)
go func() {
defer wg.Done()
f, err := os.OpenFile("songs.log",
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Println(err)
}
defer f.Close()
for text := range in {
select {
case <-done:
return
default:
if _, err := f.WriteString(text); err != nil {
log.Println(err)
}
}
}
}()
return
}
func main() {
var prodWg, consWg, storeWg sync.WaitGroup
done := make(chan struct{})
work := make(chan string, 500)
output := make(chan string, 10000)
log.Println("Starting producer...")
generate(work, done, &prodWg)
log.Println("Producers started!")
log.Println("Starting scrapers...")
for i := 0; i < 15; i++ {
scrape(work, output, done, &consWg)
}
log.Println("Consumers started!")
log.Println("Starting storage...")
store(output, done, &storeWg)
log.Println("Scraping...")
prodWg.Wait()
close(work)
consWg.Wait()
storeWg.Wait()
log.Println("Scraping is complete")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.