Last active
December 29, 2018 16:11
-
-
Save yeelone/5c857f4211a62c12d14ffb876363433e to your computer and use it in GitHub Desktop.
将stardict csv转存到 postgresql的一段golang代码
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"database/sql" | |
"encoding/csv" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"strings" | |
_ "github.com/lib/pq" | |
) | |
type Dictionary struct { | |
Word string `json:"word"` | |
Definition string `json:"definition"` | |
Translation string `json:"translation"` | |
Pos string `json:"pos"` | |
Collins string `json:"collins"` | |
Oxford string `json:"oxford"` | |
Tag string `json:"tag"` | |
Bnc string `json:"bnc"` | |
Frq string `json:"frq"` | |
Exchange string `json:"exchange"` | |
Detail string `json:"detail"` | |
Audio string `json:"audio"` | |
Phonetic string `json:"phonetic"` | |
} | |
func main() { | |
connStr := "user=elone password=123456 dbname=db_elone_brap sslmode=disable" | |
db, err := sql.Open("postgres", connStr) | |
if err != nil { | |
log.Fatal(err) | |
} | |
err = db.Ping() | |
if err != nil { | |
fmt.Println("err", err) | |
} | |
dictionary := readCSV() | |
sql := `INSERT INTO dictionary(word,phonetic,definition,translation,pos,collins,oxford,tag ,bnc,frq,exchange,detail,audio) VALUES ` | |
values := []string{} | |
limit := 10000 | |
start := 0 | |
n := 0 | |
for i, d := range dictionary { | |
//总共几百万条记录,一次性插入会崩溃。所以要分批次 | |
if start == limit || i == (len(dictionary)-1) { | |
n++ | |
s := sql + strings.Join(values, ",") | |
_, err = db.Exec(s) | |
if err != nil { | |
fmt.Println("insert into table error occured", err) | |
} else { | |
fmt.Println("insert success :", n) | |
} | |
values = []string{} | |
start = 0 | |
} | |
start++ | |
values = append(values, `(`+d.Word+`,`+ | |
d.Phonetic+`,`+d.Definition+`,`+d.Translation+`,`+d.Pos+`,`+d.Collins+`,`+d.Oxford+`,`+d.Tag+`,`+ | |
d.Bnc+`,`+d.Frq+`,`+d.Exchange+`,`+d.Detail+`,`+d.Audio+`)`) | |
} | |
} | |
func readCSV() (dictionary []Dictionary) { | |
csvFile, _ := os.Open("stardict.csv") | |
reader := csv.NewReader(bufio.NewReader(csvFile)) | |
reader.Comma = '|' | |
total := 0 | |
count := 0 | |
for { | |
line, error := reader.Read() | |
total++ | |
if error == io.EOF { | |
break | |
} else if error != nil { | |
fmt.Println(error, line, len(line)) | |
continue | |
} | |
dictionary = append(dictionary, Dictionary{ | |
Word: handlerString(line[0]), | |
Phonetic: handlerString(line[1]), | |
Definition: handlerString(line[2]), | |
Translation: handlerString(line[3]), | |
Pos: handlerString(line[4]), | |
Collins: handlerString(line[5]), | |
Oxford: handlerString(line[6]), | |
Tag: handlerString(line[7]), | |
Bnc: handlerString(line[8]), | |
Frq: handlerString(line[9]), | |
Exchange: handlerString(line[10]), | |
Detail: handlerString(line[11]), | |
Audio: handlerString(line[12]), | |
}) | |
count++ | |
} | |
fmt.Println("total", total, "count", count, "dictionary", len(dictionary)) | |
return dictionary | |
} | |
func handlerString(line string) string { | |
// 因为要插入的字符串可能包含单引号。在pg中处理这种情况,需要将单引号 ' 改为 '' | |
// | |
newLine := strings.Replace(line, `'`, `''`, -1) | |
newLine = `'` + newLine + `'` // 再用单引号包围起来 | |
return newLine | |
} |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
blindside|ˈblaɪndsaɪd|"v. catch unawares| especially with harmful consequences\nv. attack or hit on or from the side where the attacked person's view is obstructed"|"vt. 攻其不备| 出其不意地袭击; 使遭受意外打击"|||||45560|0|s:blindsides|| |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment