Created
June 27, 2017 10:11
-
-
Save appleshan/9b1e9c59408398477765a9512dda03c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 起初想用 unoconv 将各类文档转成 pdf,txt 确实是一头包。这个方法基本可以将 txt 4 种格式都统一起来。 注意 linux 下需设置 | |
# | |
# LANG C.UTF-8 | |
# | |
package until | |
import ( | |
"unicode/utf16" | |
"bytes" | |
"golang.org/x/text/encoding/simplifiedchinese" | |
"golang.org/x/text/transform" | |
"io/ioutil" | |
) | |
func Utf16toString(b []uint8) (string) { | |
if len(b)&1 != 0 { | |
return string(b) | |
} | |
var bom int | |
if len(b) >= 2 { | |
switch n := int(b[0])<<8 | int(b[1]); n { | |
case 0xfffe: | |
bom = 1 | |
fallthrough | |
case 0xfeff: | |
b = b[2:] | |
w := make([]uint16, len(b)/2) | |
for i := range w { | |
w[i] = uint16(b[2*i+bom&1])<<8 | uint16(b[2*i+(bom+1)&1]) | |
} | |
return string(utf16.Decode(w)) | |
default: | |
gbk, _ := GbkToUtf8(b) | |
return string(gbk) | |
} | |
} else { | |
return string(b) | |
} | |
} | |
func GbkToUtf8(s []byte) ([]byte, error) { | |
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder()) | |
d, e := ioutil.ReadAll(reader) | |
if e != nil { | |
return nil, e | |
} | |
return d, nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment