Created
June 15, 2017 07:10
-
-
Save imfht/a3457a10ac3000a233a4c63967b77869 to your computer and use it in GitHub Desktop.
get urls and insert some result to redis.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// package xiaoxuanfeng_v2 | |
package main | |
import ( | |
"bufio" | |
//"flag" | |
"fmt" | |
"log" | |
"net/http" | |
"os" | |
"strings" | |
"sync" | |
//"encoding/hex" | |
"time" | |
//"encoding/json" | |
"io/ioutil" | |
"crypto/md5" | |
) | |
import ( | |
"flag" | |
"gopkg.in/redis.v4" | |
"encoding/hex" | |
"strconv" | |
"encoding/json" | |
) | |
const ( | |
NumWorkers = 24 | |
) | |
var _ = fmt.Printf; | |
type Work struct { | |
url string | |
} | |
type Message struct { | |
url string | |
finger string | |
addtime string | |
} | |
type Foo struct { | |
URL string `json:"number"` | |
Fingre string `json:"title"` | |
TimeSharp string `json:"addtime"` | |
body string `json:"body"` | |
} | |
func createClient() *redis.Client { | |
client := redis.NewClient(&redis.Options{ | |
Addr: "localhost:6379", | |
Password: "", | |
DB: 0, | |
}) | |
// 通过 cient.Ping() 来检查是否成功连接到了 redis 服务器 | |
pong, err := client.Ping().Result() | |
fmt.Println(pong, err) | |
return client | |
} | |
var client = createClient() | |
func CaseInsensitiveContains(s, substr string) bool { | |
s, substr = strings.ToUpper(s), strings.ToUpper(substr) | |
return strings.Contains(s, substr) | |
} | |
func fetch(url string, c chan bool) { | |
res, err := http.Get(url) | |
if err != nil { | |
c <- false | |
fmt.Println("Status_Error", "000", "\t", err) | |
return | |
} | |
defer res.Body.Close() | |
if res.StatusCode == http.StatusOK { | |
// fmt.Println("Status____OK", res.StatusCode, "\t", url) | |
hasher := md5.New() | |
body, err := ioutil.ReadAll(res.Body) | |
if err != nil { | |
panic(err) | |
} | |
hasher.Write([]byte(body)) | |
fingre := hex.EncodeToString(hasher.Sum(nil)) | |
// add_time := time.Now().Unix() | |
addtime := strconv.FormatInt(time.Now().UTC().UnixNano(), 10) | |
// fmt.Print(addtime) | |
foo_marshalled, err := json.Marshal(Foo{TimeSharp: addtime, URL: url, Fingre: fingre, body:string(body)}) | |
// fmt.Print(string(foo_marshalled)) | |
client.RPush("content", string(foo_marshalled)) | |
//fmt.Println(string(body)) | |
//if strings.Contains(string(body), "hacked") { | |
// fmt.Println("ok", "\t", url) | |
//} | |
if CaseInsensitiveContains(string(body),"hack"){ | |
fmt.Println("hacked \t ",url) | |
} | |
// m := &Message{url:url, finger:fingre, addtime:addtime} | |
// fmt.Println(err) | |
//fmt.Println(m) | |
c <- true | |
} else { | |
fmt.Println("StatusFailed", res.StatusCode, "\t", url) | |
c <- false | |
} | |
} | |
func worker(in <-chan *Work, out chan <- *Work, wg *sync.WaitGroup) { | |
defer wg.Done() | |
for w := range in { | |
c := make(chan bool) | |
// fmt.Println(time.Now(), w.url) | |
go fetch(w.url, c) | |
timeout := time.After(5 * time.Second) | |
select { | |
case is200 := <-c: | |
if is200 { | |
out <- w | |
} | |
case <-timeout: | |
} | |
} | |
} | |
func sendLotsOfWork(in chan <- *Work, inputfile string) { | |
file, _ := os.Open(inputfile) | |
defer file.Close() | |
scanner := bufio.NewScanner(file) | |
for scanner.Scan() { | |
w := new(Work) | |
w.url = strings.TrimSpace(scanner.Text()) | |
in <- w | |
} | |
close(in) | |
} | |
func receiveLotsOfResults(out <-chan *Work, outputfile string) { | |
file, err := os.Create(outputfile) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer file.Close() | |
for w := range out { | |
_, err = file.WriteString(w.url + "\n") | |
if err != nil { | |
log.Fatal(err) | |
} | |
} | |
} | |
func Run(inputfile, outputfile string) { | |
in, out := make(chan *Work), make(chan *Work) | |
wg := &sync.WaitGroup{} | |
for i := 0; i < NumWorkers; i++ { | |
wg.Add(1) | |
go worker(in, out, wg) | |
} | |
go sendLotsOfWork(in, inputfile) | |
go func() { | |
wg.Wait() | |
close(out) | |
}() | |
receiveLotsOfResults(out, outputfile) | |
} | |
func main() { | |
if len(os.Args) != 3 { | |
fmt.Fprintf(os.Stderr, "usage: %s [inputfile] [outputfile]\n", os.Args[0]) | |
flag.PrintDefaults() | |
os.Exit(2) | |
} | |
start := time.Now() | |
inputfile, outputfile := os.Args[1], os.Args[2] | |
// fetch("http://www.baidu.com/",nil) | |
Run(inputfile, outputfile) | |
fmt.Println(time.Since(start)) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment