Last active
August 29, 2015 14:00
-
-
Save baishuai/11518466 to your computer and use it in GitHub Desktop.
搜索引擎PageRank作业代码
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"flag" | |
"fmt" | |
"io" | |
"os" | |
"strconv" | |
"strings" | |
) | |
var ( | |
alpha *float64 | |
times *int | |
graphFile *string | |
nodeMapFile *string | |
outFile *string | |
num int | |
pRank map[int]float64 | |
nodeMap map[int]string | |
linkMap map[int][]int | |
) | |
func parseFlag() { | |
alpha = flag.Float64("alpha", 0.2, "参数α") | |
times = flag.Int("m", 25, "迭代次数") | |
graphFile = flag.String("wiki", "wiki.graph", "连接关系记录D") //wikigraphHead.txt | |
nodeMapFile = flag.String("node", "node.map.utf8", "node.map.utf8") | |
outFile = flag.String("output", "result.txt", "PageRank结果输出文件") | |
flag.Parse() | |
if flag.NFlag() == 0 { | |
flag.PrintDefaults() | |
} | |
} | |
func loadGraph() { | |
f, err := os.Open(*nodeMapFile) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(0) | |
} | |
defer f.Close() | |
br := bufio.NewReader(f) | |
nodeMap = make(map[int]string) | |
linkMap = make(map[int][]int) | |
var index, value int | |
var name string | |
for { | |
line, err := br.ReadString('\n') | |
if err == io.EOF { | |
break | |
} else { | |
index = strings.Index(line, "-->") | |
name = line[:index] | |
value, err = strconv.Atoi(line[index+3 : len(line)-1]) | |
nodeMap[value] = name | |
linkMap[value] = make([]int, 0) | |
} | |
} | |
num = len(nodeMap) | |
fmt.Println(num) | |
f, err = os.Open(*graphFile) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(0) | |
} | |
defer f.Close() | |
br = bufio.NewReader(f) | |
var outs []string | |
for { | |
line, err := br.ReadString('\n') | |
if err == io.EOF { | |
break | |
} else { | |
index = strings.Index(line, ":") | |
value, err = strconv.Atoi(line[:index]) | |
outs = strings.Split(line[index+1:len(line)-1], ",") //末尾换行符去掉 | |
for i := 0; i < len(outs); i++ { | |
out, _ := strconv.Atoi(outs[i]) | |
/*isnew := true | |
for j := 0; j < len(linkMap[value]); j++ { | |
if out == linkMap[value][j] { | |
isnew = false | |
break | |
} | |
} | |
if isnew {*/ | |
linkMap[value] = append(linkMap[value], out) | |
//} | |
} | |
} | |
} | |
fmt.Println("links format over") | |
} | |
func rank() { | |
iTmp := make(map[int]float64) | |
pRank = make(map[int]float64) | |
s0Tmp, s1Tmp := 0.0, 0.0 | |
f1 := 1.0 / float64(num) | |
f2 := *alpha / float64(num) | |
for k := range nodeMap { | |
iTmp[k] = f2 | |
pRank[k] = f1 | |
if len(linkMap[k]) == 0 { //outdegree | |
s1Tmp += pRank[k] | |
} | |
} | |
for k := 0; k < *times; k++ { | |
//遍历每条链接关系 | |
for node, value := range linkMap { | |
outdegree := len(value) | |
for i := 0; i < outdegree; i++ { | |
iTmp[value[i]] += (1.0 - *alpha) * pRank[node] / float64(outdegree) | |
} | |
} | |
s0Tmp, s1Tmp = s1Tmp, 0.0 | |
for n := range pRank { | |
pRank[n] = iTmp[n] + (1.0-*alpha)*s0Tmp/float64(num) | |
iTmp[n] = f2 | |
if len(linkMap[n]) == 0 { //outdegree | |
s1Tmp += pRank[n] | |
} | |
} | |
} | |
fmt.Println("rank over") | |
} | |
func output() { | |
f, err := os.Create(*outFile) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(0) | |
} | |
defer f.Close() | |
bw := bufio.NewWriter(f) | |
for k, v := range pRank { | |
bw.WriteString(fmt.Sprintf("%.8d:%.20f:%s\n", k, v, nodeMap[k])) | |
} | |
bw.Flush() | |
} | |
func main() { | |
parseFlag() | |
loadGraph() | |
rank() | |
output() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment