Created
January 24, 2018 09:43
-
-
Save Vic020/b5d3eb814e6e403ec4188e66417a3fed to your computer and use it in GitHub Desktop.
tts语音合成,用了百度api来合成,按段和每段500字分割,然后合成一个音频
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"net/http" | |
"fmt" | |
"io/ioutil" | |
"encoding/json" | |
"os" | |
"path/filepath" | |
"strings" | |
"bufio" | |
"github.com/dmulholland/mp3lib" | |
) | |
type tokenJson struct { | |
AccessToken string `json:"access_token"` | |
ExpiresIn int `json:"expires_in"` | |
RefreshToken string `json:"refresh_token"` | |
Scope string `json:"scope"` | |
SessionKey string `json:"session_key"` | |
SessionSecret string `json:"session_secret"` | |
} | |
type appJson struct { | |
AppKey string `json:"AppKey"` | |
SecretKey string `json:"SecretKey"` | |
} | |
func check(e error) { | |
if e != nil { | |
panic(e) | |
} | |
} | |
func parseFile(fpath string) (lines []string, fileAbsPath string, filePureName string) { | |
absPath, err := filepath.Abs(fpath) | |
if err != nil { | |
panic(err) | |
} | |
fileAbsPath, fileName := filepath.Split(absPath) | |
filePureName = strings.TrimSuffix(fileName, filepath.Ext(fpath)) | |
// read file line by line, each line cut off by 500 limit. | |
f, err := os.Open(fpath) | |
defer f.Close() | |
check(err) | |
reader := bufio.NewReader(f) | |
for line, isPrefix, err := reader.ReadLine(); err == nil && !isPrefix; line, isPrefix, err = reader.ReadLine() { | |
tmp := string(line) | |
for i, j := 0, 500; i < len(tmp); i, j = i+500, j+500 { | |
if j >= len(tmp) { | |
j = len(tmp) | |
} | |
lines = append(lines, tmp[i:j]) | |
} | |
} | |
return | |
} | |
func getToken(appKey, secretKey string) string { | |
url := "https://openapi.baidu.com/oauth/2.0/token" | |
client := &http.Client{} | |
req, err := http.NewRequest("GET", url, nil) | |
q := req.URL.Query() | |
q.Add("grant_type", "client_credentials") | |
q.Add("client_id", appKey) | |
q.Add("client_secret", secretKey) | |
req.URL.RawQuery = q.Encode() | |
if err != nil { | |
panic(err) | |
} | |
resp, err := client.Do(req) | |
if err != nil { | |
fmt.Println("Network Error") | |
panic(err) | |
} | |
defer resp.Body.Close() | |
body, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
fmt.Println("Service Error") | |
panic(err) | |
} | |
resJson := tokenJson{} | |
json.Unmarshal(body, &resJson) | |
return resJson.AccessToken | |
} | |
func speech(token string, text string) (singpleMP3 []byte) { | |
url := "http://tsn.baidu.com/text2audio" | |
client := &http.Client{} | |
req, err := http.NewRequest("GET", url, nil) | |
check(err) | |
q := req.URL.Query() | |
q.Add("lan", "zh") | |
q.Add("ctp", "1") | |
q.Add("tex", text) | |
q.Add("tok", token) | |
q.Add("cuid", "golang_script") | |
req.URL.RawQuery = q.Encode() | |
resp, err := client.Do(req) | |
defer resp.Body.Close() | |
check(err) | |
singpleMP3, err = ioutil.ReadAll(resp.Body) | |
if err != nil { | |
fmt.Println("Service Error") | |
panic(err) | |
} | |
return | |
} | |
func synthesis(inpaths []string, outpath string) { | |
var firstBitRate int | |
var totalFrames uint32 | |
var totalBytes uint32 | |
var totalFiles int | |
//var isVBR bool | |
outfile, err := os.Create(outpath) | |
if err != nil { | |
fmt.Fprintln(os.Stderr, err) | |
os.Exit(1) | |
} | |
for _, inpath := range inpaths { | |
fmt.Println("+", inpath) | |
infile, err := os.Open(inpath) | |
if err != nil { | |
fmt.Fprintln(os.Stderr, err) | |
os.Exit(1) | |
} | |
isFirstFrame := true | |
for { | |
// Read the next frame from the input file. | |
frame := mp3lib.NextFrame(infile) | |
if frame == nil { | |
break | |
} | |
// Skip the first frame if it's a VBR header. | |
if isFirstFrame { | |
isFirstFrame = false | |
if mp3lib.IsXingHeader(frame) || mp3lib.IsVbriHeader(frame) { | |
continue | |
} | |
} | |
// If we detect more than one bitrate we'll need to add a VBR | |
// header to the output file. | |
if firstBitRate == 0 { | |
firstBitRate = frame.BitRate | |
} else if frame.BitRate != firstBitRate { | |
//isVBR = true | |
} | |
// Write the frame to the output file. | |
_, err := outfile.Write(frame.RawBytes) | |
if err != nil { | |
fmt.Fprintln(os.Stderr, err) | |
os.Exit(1) | |
} | |
totalFrames += 1 | |
totalBytes += uint32(len(frame.RawBytes)) | |
} | |
infile.Close() | |
totalFiles += 1 | |
} | |
outfile.Close() | |
} | |
func main() { | |
var wordPath string | |
var app appJson | |
data, err := ioutil.ReadFile(filepath.Join(filepath.Dir(os.Args[0]), "./conf.json")) | |
if err != nil { | |
fmt.Println("缺少配置文件") | |
fmt.Scanln() | |
os.Exit(1) | |
} | |
json.Unmarshal(data, &app) | |
fmt.Printf("请把文件拖入到这个框中\n") | |
fmt.Scanln(&wordPath) | |
// Get token | |
token := getToken(app.AppKey, app.SecretKey) | |
// Parse file to each line with 512 length limit | |
words, path, filename := parseFile(wordPath) | |
fmt.Printf("获取到%d段文字,转换中\n", len(words)) | |
var ins []string | |
for i, word := range words { | |
fpathname := filepath.Join(path, fmt.Sprintf("%s_%d.mp3", filename, i)) | |
fmt.Printf("进行到%d/%d, %s 保存到%s\n", i+1, len(words), word, fpathname) | |
ins = append(ins, fpathname) | |
mp3File := speech(token, word) | |
ioutil.WriteFile(fpathname, mp3File, 0644) | |
} | |
// synthesis | |
fmt.Printf("开始合成\n") | |
synthesis(ins, filepath.Join(path, fmt.Sprintf("%s.mp3", filename))) | |
fmt.Printf("完成") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment