Skip to content

Instantly share code, notes, and snippets.

@DefaultPerson
Created February 4, 2025 16:38
Show Gist options
  • Save DefaultPerson/ab7740a433cea07014522b892e77da5a to your computer and use it in GitHub Desktop.
Save DefaultPerson/ab7740a433cea07014522b892e77da5a to your computer and use it in GitHub Desktop.
package scrappers
import (
"context"
"github.com/rs/zerolog/log"
"net/http"
"test/utils"
"time"
)
const RequestTimeout = 5
type Scrapper interface {
Run(
ctx context.Context,
channels []utils.Channel,
wsHub *utils.Hub,
pm utils.ProxyManager,
telegramRetryDelay int,
telegramRetryCount int,
statsReportPeriod int,
proxyCooldown int,
announcesCount int,
delay float64,
domainCount int,
)
MakeRequest(
client *http.Client,
pm utils.ProxyManager,
ctx context.Context,
channels []utils.Channel,
wsHub *utils.Hub,
telegramRetryDelay int,
telegramRetryCount int,
announcesCount int,
stats *utils.Statistic,
)
IsProxyError(
transport *http.Transport,
body []byte,
cfg *utils.Statistic,
)
DeserializeData(
body []byte,
schema struct{},
)
}
type BaseScrappy struct {
exchange string
targetUrl string
}
func (bs *BaseScrappy) run(
s Scrapper,
ctx context.Context,
channels []utils.Channel,
wsHub *utils.Hub,
pm utils.ProxyManager,
telegramRetryDelay int,
telegramRetryCount int,
statsReportPeriod int,
proxyCooldown int,
announcesCount int,
delay float64,
domainCount int,
) {
calcDelay := time.Duration(delay) * time.Second
time.Sleep(calcDelay)
log.Info().Msgf("Scrapper started with delay %s", calcDelay)
if s == nil {
panic("self is nil! Ensure you assign self before calling methods")
}
stats := utils.Statistic{}
statsTicker := time.Second * time.Duration(statsReportPeriod)
go utils.StatsReporter(statsTicker, bs.exchange, &stats)
go pm.CheckRestoreTransports()
client := &http.Client{
Timeout: RequestTimeout * time.Second,
}
for {
go s.MakeRequest(
client, pm, ctx, channels, wsHub,
telegramRetryDelay, telegramRetryCount, announcesCount, &stats,
)
stats.BadProxies = pm.CountBad()
stats.ActiveProxies = pm.Count()
interval := time.Duration(
float64(time.Second) / (float64(pm.Count()) / float64(proxyCooldown)) * float64(domainCount),
)
stats.CurrentDuration = interval
time.Sleep(interval)
}
}
package scrappers
import (
"bytes"
"context"
"encoding/json"
"github.com/rs/zerolog/log"
"io"
"net/http"
"test/utils"
"time"
)
type binanceAnnounce struct {
ID int `json:"id"`
Code string `json:"code"`
Title string `json:"title"`
ReleaseDate int64 `json:"releaseDate"`
}
type binanceAnnouncesResponse struct {
Data struct {
Catalogs []struct {
Articles []binanceAnnounce `json:"articles"`
} `json:"catalogs"`
} `json:"data"`
}
type BinanceExtended interface {
Scrapper
FilterAnnouncement(text string)
}
type BinanceScrappy struct {
BaseScrappy
}
func (bs BinanceScrappy) IsProxyError(
transport *http.Transport,
body []byte,
cfg *utils.Statistic,
) {
if bytes.Contains(body, []byte("Cloudflare")) ||
bytes.Contains(body, []byte("cf-error-details")) {
cfg.BadProxies++
}
}
func (bs BinanceScrappy) FilterAnnouncement(
text string,
) {
}
func (bs BinanceScrappy) DeserializeData(
body []byte,
schema struct{},
) {
// Если хотим что-то вернуть, придётся менять интерфейс,
// но сейчас просто обрабатываем внутри
if bytes.Contains(body, []byte("Cloudflare")) ||
bytes.Contains(body, []byte("cf-error-details")) {
log.Info().Msg("Cloudflare error inside!")
}
}
func (bs *BinanceScrappy) Run(
ctx context.Context,
channels []utils.Channel,
wsHub *utils.Hub,
pm utils.ProxyManager,
telegramRetryDelay int,
telegramRetryCount int,
statsReportPeriod int,
proxyCooldown int,
announcesCount int,
delay float64,
domainCount int,
) {
bs.run(
bs,
ctx,
channels,
wsHub,
pm,
telegramRetryDelay,
telegramRetryCount,
statsReportPeriod,
proxyCooldown,
announcesCount,
delay,
domainCount,
)
}
func (bs BinanceScrappy) MakeRequest(
//b BinanceScrapper,
client *http.Client,
pm utils.ProxyManager,
ctx context.Context,
channels []utils.Channel,
wsHub *utils.Hub,
telegramRetryDelay int,
telegramRetryCount int,
announcesCount int,
stats *utils.Statistic,
) {
for {
transport := pm.GetTransport()
if transport == nil {
log.Warn().Msg("No available proxies, waiting...")
time.Sleep(5 * time.Second)
break
}
client.Transport = transport
resp, err := client.Get(bs.targetUrl)
stats.TotalRequests++
if err != nil {
//if proxyChecker, ok := b.(BinanceScrappy); ok {
log.Info().Msg("Detected IsProxyError method, executing...")
bs.IsProxyError(nil, nil, stats)
//}
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return
//return nil, fmt.Errorf("read body failed: %w", err)
}
var response binanceAnnouncesResponse
json.Unmarshal(body, &response)
log.Info().Msgf("Response: %v", response)
break
}
}
func NewBinanceScrapper() BinanceExtended {
bs := &BinanceScrappy{
BaseScrappy: BaseScrappy{
exchange: "binance",
targetUrl: "https://www.binance.com/bapi/apex/v1/public/apex/cms/article/list/query?type=1&pageNo=1&pageSize=10&catalogId=48",
},
}
var extended BinanceExtended = bs
return extended
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment