Skip to content

Instantly share code, notes, and snippets.

@imfht
Created June 15, 2017 07:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save imfht/a3457a10ac3000a233a4c63967b77869 to your computer and use it in GitHub Desktop.
Save imfht/a3457a10ac3000a233a4c63967b77869 to your computer and use it in GitHub Desktop.
get urls and insert some result to redis.
// package xiaoxuanfeng_v2
package main
import (
"bufio"
//"flag"
"fmt"
"log"
"net/http"
"os"
"strings"
"sync"
//"encoding/hex"
"time"
//"encoding/json"
"io/ioutil"
"crypto/md5"
)
import (
"flag"
"gopkg.in/redis.v4"
"encoding/hex"
"strconv"
"encoding/json"
)
const (
NumWorkers = 24
)
var _ = fmt.Printf;
type Work struct {
url string
}
type Message struct {
url string
finger string
addtime string
}
type Foo struct {
URL string `json:"number"`
Fingre string `json:"title"`
TimeSharp string `json:"addtime"`
body string `json:"body"`
}
func createClient() *redis.Client {
client := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
Password: "",
DB: 0,
})
// 通过 cient.Ping() 来检查是否成功连接到了 redis 服务器
pong, err := client.Ping().Result()
fmt.Println(pong, err)
return client
}
var client = createClient()
func CaseInsensitiveContains(s, substr string) bool {
s, substr = strings.ToUpper(s), strings.ToUpper(substr)
return strings.Contains(s, substr)
}
func fetch(url string, c chan bool) {
res, err := http.Get(url)
if err != nil {
c <- false
fmt.Println("Status_Error", "000", "\t", err)
return
}
defer res.Body.Close()
if res.StatusCode == http.StatusOK {
// fmt.Println("Status____OK", res.StatusCode, "\t", url)
hasher := md5.New()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
panic(err)
}
hasher.Write([]byte(body))
fingre := hex.EncodeToString(hasher.Sum(nil))
// add_time := time.Now().Unix()
addtime := strconv.FormatInt(time.Now().UTC().UnixNano(), 10)
// fmt.Print(addtime)
foo_marshalled, err := json.Marshal(Foo{TimeSharp: addtime, URL: url, Fingre: fingre, body:string(body)})
// fmt.Print(string(foo_marshalled))
client.RPush("content", string(foo_marshalled))
//fmt.Println(string(body))
//if strings.Contains(string(body), "hacked") {
// fmt.Println("ok", "\t", url)
//}
if CaseInsensitiveContains(string(body),"hack"){
fmt.Println("hacked \t ",url)
}
// m := &Message{url:url, finger:fingre, addtime:addtime}
// fmt.Println(err)
//fmt.Println(m)
c <- true
} else {
fmt.Println("StatusFailed", res.StatusCode, "\t", url)
c <- false
}
}
func worker(in <-chan *Work, out chan <- *Work, wg *sync.WaitGroup) {
defer wg.Done()
for w := range in {
c := make(chan bool)
// fmt.Println(time.Now(), w.url)
go fetch(w.url, c)
timeout := time.After(5 * time.Second)
select {
case is200 := <-c:
if is200 {
out <- w
}
case <-timeout:
}
}
}
func sendLotsOfWork(in chan <- *Work, inputfile string) {
file, _ := os.Open(inputfile)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
w := new(Work)
w.url = strings.TrimSpace(scanner.Text())
in <- w
}
close(in)
}
func receiveLotsOfResults(out <-chan *Work, outputfile string) {
file, err := os.Create(outputfile)
if err != nil {
log.Fatal(err)
}
defer file.Close()
for w := range out {
_, err = file.WriteString(w.url + "\n")
if err != nil {
log.Fatal(err)
}
}
}
func Run(inputfile, outputfile string) {
in, out := make(chan *Work), make(chan *Work)
wg := &sync.WaitGroup{}
for i := 0; i < NumWorkers; i++ {
wg.Add(1)
go worker(in, out, wg)
}
go sendLotsOfWork(in, inputfile)
go func() {
wg.Wait()
close(out)
}()
receiveLotsOfResults(out, outputfile)
}
func main() {
if len(os.Args) != 3 {
fmt.Fprintf(os.Stderr, "usage: %s [inputfile] [outputfile]\n", os.Args[0])
flag.PrintDefaults()
os.Exit(2)
}
start := time.Now()
inputfile, outputfile := os.Args[1], os.Args[2]
// fetch("http://www.baidu.com/",nil)
Run(inputfile, outputfile)
fmt.Println(time.Since(start))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment