Skip to content

Instantly share code, notes, and snippets.

@cipepser
Last active March 11, 2017 03:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cipepser/b73da44413a4ccb5c9e32bd95a7ae4a4 to your computer and use it in GitHub Desktop.
Save cipepser/b73da44413a4ccb5c9e32bd95a7ae4a4 to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os"
"regexp"
"strings"
)
type Article struct {
Text string `json:"text"`
Title string `json:"title"`
}
func main() {
articles := []Article{}
f, err := os.Open("../data/jawiki-country.json")
defer f.Close()
if err != nil {
panic(err)
}
r := bufio.NewReader(f)
for {
b, err := r.ReadBytes('\n')
if err == io.EOF {
break
}
a := Article{}
json.Unmarshal([]byte(b), &a)
articles = append(articles, a)
}
var txt string
for _, article := range articles {
if article.Title == "イギリス" {
txt = article.Text
}
}
// 基本情報だけ抜き出し
reg := regexp.MustCompile(`{{基礎情報 国[\s\S]*\n}}`)
txt = string(reg.FindAll([]byte(txt), -1)[0])
txt = strings.Replace(txt, "{{基礎情報 国\n", "", 1)
// 前処理
// `\n|`でマッチさせると偶数回目がfindできないため`|`をダブらせる
txt = strings.Replace(txt, "\n|", "\n|\n|", -1)
// 注記のあとには`\n|`がないので追加しておく
txt = strings.Replace(txt, "\n}}", "\n|}}", -1)
// フィールドごとに分ける
reg = regexp.MustCompile(`(?m)^\|[\s\S]*?\n\|`)
// Mapに格納
m := make(map[string]string)
for _, v := range reg.FindAll([]byte(txt), -1) {
s := string(v[1: len(v) - 2])
strs := strings.Split(s, " = ")
m[strs[0]] = strs[1]
}
fmt.Println(m["略名"])
fmt.Println("------------------")
fmt.Println(m["公式国名"])
fmt.Println("------------------")
fmt.Println(m["確立形態4"])
fmt.Println("------------------")
fmt.Println(m["注記"])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment