Created
November 11, 2021 07:51
-
-
Save Loner1024/9d9538974618daba8b339ddbc240a834 to your computer and use it in GitHub Desktop.
MapReduce
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"sort" | |
"strconv" | |
"strings" | |
"unicode" | |
) | |
type KeyValue struct { | |
Key string | |
Value string | |
} | |
type ByKey []KeyValue | |
func (a ByKey) Len() int { return len(a) } | |
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key } | |
func main() { | |
files := []string{"pg-being_ernest.txt", "pg-dorian_gray.txt", "pg-frankenstein.txt", "pg-grimm.txt", "pg-huckleberry_finn.txt", "pg-metamorphosis.txt", "pg-sherlock_holmes.txt", "pg-tom_sawyer.txt"} | |
var intermediate []KeyValue | |
for _, filename := range files { | |
file, err := os.Open(filename) | |
if err != nil { | |
log.Fatalf("cannot open %v", filename) | |
} | |
content, err := io.ReadAll(file) | |
if err != nil { | |
log.Fatalf("cannot read file: %v", filename) | |
} | |
file.Close() | |
kva := Map(filename, string(content)) | |
intermediate = append(intermediate, kva...) | |
} | |
// 按 Key 排序 | |
sort.Sort(ByKey(intermediate)) | |
oname := "output" | |
ofile, err := os.Create(oname) | |
if err != nil { | |
log.Fatalln("cannot create output file") | |
} | |
i := 0 | |
for i < len(intermediate) { | |
j := i + 1 | |
for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key { | |
j++ | |
} | |
var values []string | |
for k := i; k < j; k++ { | |
values = append(values, intermediate[k].Value) | |
} | |
output := Reduce(intermediate[i].Key, values) | |
fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output) | |
i = j | |
} | |
ofile.Close() | |
} | |
// Map 函数对于每个输入文件,都会调用一次, 第一个参数是输入文件的名称,第二个参数是该文件的完整内容。 | |
func Map(filename, contents string) []KeyValue { | |
// 检测单单词分隔符, IsLetter 函数检测字符是否为字母 | |
ff := func(r rune) bool { return !unicode.IsLetter(r) } | |
// 使用自定义的函数分割字符串 | |
words := strings.FieldsFunc(contents, ff) | |
var kva []KeyValue | |
for _, w := range words { | |
kva = append(kva, KeyValue{ | |
Key: w, | |
Value: "1", | |
}) | |
} | |
return kva | |
} | |
// Reduce 对于每个 Map 任务产生的 Key 都会被调用一次, 包括任何 Map 任务为这个 Key 创建的 Value | |
func Reduce(key string, values []string) string { | |
return strconv.Itoa(len(values)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment