Skip to content

Instantly share code, notes, and snippets.

@yeelone
Last active December 29, 2018 16:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yeelone/5c857f4211a62c12d14ffb876363433e to your computer and use it in GitHub Desktop.
Save yeelone/5c857f4211a62c12d14ffb876363433e to your computer and use it in GitHub Desktop.
将stardict csv转存到 postgresql的一段golang代码
package main
import (
"bufio"
"database/sql"
"encoding/csv"
"fmt"
"io"
"log"
"os"
"strings"
_ "github.com/lib/pq"
)
type Dictionary struct {
Word string `json:"word"`
Definition string `json:"definition"`
Translation string `json:"translation"`
Pos string `json:"pos"`
Collins string `json:"collins"`
Oxford string `json:"oxford"`
Tag string `json:"tag"`
Bnc string `json:"bnc"`
Frq string `json:"frq"`
Exchange string `json:"exchange"`
Detail string `json:"detail"`
Audio string `json:"audio"`
Phonetic string `json:"phonetic"`
}
func main() {
connStr := "user=elone password=123456 dbname=db_elone_brap sslmode=disable"
db, err := sql.Open("postgres", connStr)
if err != nil {
log.Fatal(err)
}
err = db.Ping()
if err != nil {
fmt.Println("err", err)
}
dictionary := readCSV()
sql := `INSERT INTO dictionary(word,phonetic,definition,translation,pos,collins,oxford,tag ,bnc,frq,exchange,detail,audio) VALUES `
values := []string{}
limit := 10000
start := 0
n := 0
for i, d := range dictionary {
//总共几百万条记录,一次性插入会崩溃。所以要分批次
if start == limit || i == (len(dictionary)-1) {
n++
s := sql + strings.Join(values, ",")
_, err = db.Exec(s)
if err != nil {
fmt.Println("insert into table error occured", err)
} else {
fmt.Println("insert success :", n)
}
values = []string{}
start = 0
}
start++
values = append(values, `(`+d.Word+`,`+
d.Phonetic+`,`+d.Definition+`,`+d.Translation+`,`+d.Pos+`,`+d.Collins+`,`+d.Oxford+`,`+d.Tag+`,`+
d.Bnc+`,`+d.Frq+`,`+d.Exchange+`,`+d.Detail+`,`+d.Audio+`)`)
}
}
func readCSV() (dictionary []Dictionary) {
csvFile, _ := os.Open("stardict.csv")
reader := csv.NewReader(bufio.NewReader(csvFile))
reader.Comma = '|'
total := 0
count := 0
for {
line, error := reader.Read()
total++
if error == io.EOF {
break
} else if error != nil {
fmt.Println(error, line, len(line))
continue
}
dictionary = append(dictionary, Dictionary{
Word: handlerString(line[0]),
Phonetic: handlerString(line[1]),
Definition: handlerString(line[2]),
Translation: handlerString(line[3]),
Pos: handlerString(line[4]),
Collins: handlerString(line[5]),
Oxford: handlerString(line[6]),
Tag: handlerString(line[7]),
Bnc: handlerString(line[8]),
Frq: handlerString(line[9]),
Exchange: handlerString(line[10]),
Detail: handlerString(line[11]),
Audio: handlerString(line[12]),
})
count++
}
fmt.Println("total", total, "count", count, "dictionary", len(dictionary))
return dictionary
}
func handlerString(line string) string {
// 因为要插入的字符串可能包含单引号。在pg中处理这种情况,需要将单引号 ' 改为 ''
//
newLine := strings.Replace(line, `'`, `''`, -1)
newLine = `'` + newLine + `'` // 再用单引号包围起来
return newLine
}
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
blindside|ˈblaɪndsaɪd|"v. catch unawares| especially with harmful consequences\nv. attack or hit on or from the side where the attacked person's view is obstructed"|"vt. 攻其不备| 出其不意地袭击; 使遭受意外打击"|||||45560|0|s:blindsides||
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment