Skip to content

Instantly share code, notes, and snippets.

@Loner1024
Created November 11, 2021 07:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Loner1024/9d9538974618daba8b339ddbc240a834 to your computer and use it in GitHub Desktop.
Save Loner1024/9d9538974618daba8b339ddbc240a834 to your computer and use it in GitHub Desktop.
MapReduce
package main
import (
"fmt"
"io"
"log"
"os"
"sort"
"strconv"
"strings"
"unicode"
)
type KeyValue struct {
Key string
Value string
}
type ByKey []KeyValue
func (a ByKey) Len() int { return len(a) }
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
func main() {
files := []string{"pg-being_ernest.txt", "pg-dorian_gray.txt", "pg-frankenstein.txt", "pg-grimm.txt", "pg-huckleberry_finn.txt", "pg-metamorphosis.txt", "pg-sherlock_holmes.txt", "pg-tom_sawyer.txt"}
var intermediate []KeyValue
for _, filename := range files {
file, err := os.Open(filename)
if err != nil {
log.Fatalf("cannot open %v", filename)
}
content, err := io.ReadAll(file)
if err != nil {
log.Fatalf("cannot read file: %v", filename)
}
file.Close()
kva := Map(filename, string(content))
intermediate = append(intermediate, kva...)
}
// 按 Key 排序
sort.Sort(ByKey(intermediate))
oname := "output"
ofile, err := os.Create(oname)
if err != nil {
log.Fatalln("cannot create output file")
}
i := 0
for i < len(intermediate) {
j := i + 1
for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
j++
}
var values []string
for k := i; k < j; k++ {
values = append(values, intermediate[k].Value)
}
output := Reduce(intermediate[i].Key, values)
fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
i = j
}
ofile.Close()
}
// Map 函数对于每个输入文件,都会调用一次, 第一个参数是输入文件的名称,第二个参数是该文件的完整内容。
func Map(filename, contents string) []KeyValue {
// 检测单单词分隔符, IsLetter 函数检测字符是否为字母
ff := func(r rune) bool { return !unicode.IsLetter(r) }
// 使用自定义的函数分割字符串
words := strings.FieldsFunc(contents, ff)
var kva []KeyValue
for _, w := range words {
kva = append(kva, KeyValue{
Key: w,
Value: "1",
})
}
return kva
}
// Reduce 对于每个 Map 任务产生的 Key 都会被调用一次, 包括任何 Map 任务为这个 Key 创建的 Value
func Reduce(key string, values []string) string {
return strconv.Itoa(len(values))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment