Skip to content

Instantly share code, notes, and snippets.

@baishuai
Last active August 29, 2015 14:00
Show Gist options
  • Save baishuai/11518466 to your computer and use it in GitHub Desktop.
Save baishuai/11518466 to your computer and use it in GitHub Desktop.
搜索引擎PageRank作业代码
package main
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"strconv"
"strings"
)
var (
alpha *float64
times *int
graphFile *string
nodeMapFile *string
outFile *string
num int
pRank map[int]float64
nodeMap map[int]string
linkMap map[int][]int
)
func parseFlag() {
alpha = flag.Float64("alpha", 0.2, "参数α")
times = flag.Int("m", 25, "迭代次数")
graphFile = flag.String("wiki", "wiki.graph", "连接关系记录D") //wikigraphHead.txt
nodeMapFile = flag.String("node", "node.map.utf8", "node.map.utf8")
outFile = flag.String("output", "result.txt", "PageRank结果输出文件")
flag.Parse()
if flag.NFlag() == 0 {
flag.PrintDefaults()
}
}
func loadGraph() {
f, err := os.Open(*nodeMapFile)
if err != nil {
fmt.Println(err)
os.Exit(0)
}
defer f.Close()
br := bufio.NewReader(f)
nodeMap = make(map[int]string)
linkMap = make(map[int][]int)
var index, value int
var name string
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
} else {
index = strings.Index(line, "-->")
name = line[:index]
value, err = strconv.Atoi(line[index+3 : len(line)-1])
nodeMap[value] = name
linkMap[value] = make([]int, 0)
}
}
num = len(nodeMap)
fmt.Println(num)
f, err = os.Open(*graphFile)
if err != nil {
fmt.Println(err)
os.Exit(0)
}
defer f.Close()
br = bufio.NewReader(f)
var outs []string
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
} else {
index = strings.Index(line, ":")
value, err = strconv.Atoi(line[:index])
outs = strings.Split(line[index+1:len(line)-1], ",") //末尾换行符去掉
for i := 0; i < len(outs); i++ {
out, _ := strconv.Atoi(outs[i])
/*isnew := true
for j := 0; j < len(linkMap[value]); j++ {
if out == linkMap[value][j] {
isnew = false
break
}
}
if isnew {*/
linkMap[value] = append(linkMap[value], out)
//}
}
}
}
fmt.Println("links format over")
}
func rank() {
iTmp := make(map[int]float64)
pRank = make(map[int]float64)
s0Tmp, s1Tmp := 0.0, 0.0
f1 := 1.0 / float64(num)
f2 := *alpha / float64(num)
for k := range nodeMap {
iTmp[k] = f2
pRank[k] = f1
if len(linkMap[k]) == 0 { //outdegree
s1Tmp += pRank[k]
}
}
for k := 0; k < *times; k++ {
//遍历每条链接关系
for node, value := range linkMap {
outdegree := len(value)
for i := 0; i < outdegree; i++ {
iTmp[value[i]] += (1.0 - *alpha) * pRank[node] / float64(outdegree)
}
}
s0Tmp, s1Tmp = s1Tmp, 0.0
for n := range pRank {
pRank[n] = iTmp[n] + (1.0-*alpha)*s0Tmp/float64(num)
iTmp[n] = f2
if len(linkMap[n]) == 0 { //outdegree
s1Tmp += pRank[n]
}
}
}
fmt.Println("rank over")
}
func output() {
f, err := os.Create(*outFile)
if err != nil {
fmt.Println(err)
os.Exit(0)
}
defer f.Close()
bw := bufio.NewWriter(f)
for k, v := range pRank {
bw.WriteString(fmt.Sprintf("%.8d:%.20f:%s\n", k, v, nodeMap[k]))
}
bw.Flush()
}
func main() {
parseFlag()
loadGraph()
rank()
output()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment