前言说明
火山引擎语音合成是由字节跳动推出的一项AI技术服务,可将文字内容转化为高质量、自然流畅的音频内容。它采用了深度学习、语音信号处理等技术,通过机器生成的方式实现语音合成。
火山引擎语音合成提供了多种发音角色和音色,包括中英文、粤语、普通话等多种常见语言和方言,并支持调节语速、音调等参数,以满足不同场景下的需求。
通过使用火山引擎语音合成,用户可以快速生成自然流畅、高质量的语音内容,广泛应用于在线教育、智能客服、语音导航、语音广告等领域。
脚本代码
Python环境
#coding=utf-8
'''
requires Python 3.6 or later
pip install requests
'''
import base64
import json
import uuid
import requests
# 填写平台申请的appid, access_token以及cluster
appid = "xxxx"
access_token= "xxxx"
cluster = "xxxx"
voice_type = "xxxx"
host = "openspeech.bytedance.com"
api_url = f"https://{host}/api/v1/tts"
header = {"Authorization": f"Bearer;{access_token}"}
request_json = {
"app": {
"appid": appid,
"token": "access_token",
"cluster": cluster
},
"user": {
"uid": "388808087185088"
},
"audio": {
"voice": "other",
"voice_type": voice_type,
"encoding": "mp3",
"speed": 10,
"volume": 10,
"pitch": 10
},
"request": {
"reqid": str(uuid.uuid4()),
"text": "字节跳动语音合成",
"text_type": "plain",
"operation": "query",
"with_frontend": 1,
"frontend_type": "unitTson"
}
}
if __name__ == '__main__':
try:
resp = requests.post(api_url, json.dumps(request_json), headers=header)
print(f"resp body: \n{resp.json()}")
if "data" in resp.json():
data = resp.json()["data"]
file_to_save = open("test_submit.mp3", "wb")
file_to_save.write(base64.b64decode(data))
except Exception as e:
e.with_traceback()
Go环境
package main
import (
"fmt"
"time"
"bytes"
"errors"
"io/ioutil"
"net/http"
"encoding/json"
"encoding/base64"
"github.com/satori/go.uuid"
)
//TTSServResponse response from backend srvs
type TTSServResponse struct {
ReqID string `json:"reqid"`
Code int `json:"code"`
Message string `json:"Message"`
Operation string `json:"operation"`
Sequence int `json:"sequence"`
Data string `json:"data"`
}
func httpPost(url string, headers map[string]string, body []byte,
timeout time.Duration) ([]byte, error) {
client := &http.Client{
Timeout: timeout,
}
req, err := http.NewRequest(http.MethodPost, url, bytes.NewBuffer(body))
if err != nil {
return nil, err
}
for key, value := range headers {
req.Header.Set(key, value)
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
retBody, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return retBody, err
}
func synthesis(text string) ([]byte, error) {
reqID := uuid.Must(uuid.NewV4()).String()
params := make(map[string]map[string]interface{})
params["app"] = make(map[string]interface{})
//填写平台申请的appid
params["app"]["appid"] = "xxxx"
//这部分的token不生效,填写下方的默认值就好
params["app"]["token"] = "access_token"
//填写平台上显示的集群名称
params["app"]["cluster"] = "xxxx"
params["user"] = make(map[string]interface{})
//这部分如有需要,可以传递用户真实的ID,方便问题定位
params["user"]["uid"] = "uid"
params["audio"] = make(map[string]interface{})
params["audio"]["voice"] = "other"
//填写选中的音色代号
params["audio"]["voice_type"] = "xxxx"
params["audio"]["encoding"] = "wav"
params["audio"]["speed"] = 10
params["audio"]["volume"] = 10
params["audio"]["pitch"] = 10
params["request"] = make(map[string]interface{})
params["request"]["reqid"] = reqID
params["request"]["text"] = text
params["request"]["text_type"] = "plain"
params["request"]["operation"] = "query"
headers := make(map[string]string)
headers["Content-Type"] = "application/json"
//bearerToken为saas平台对应的接入认证中的Token
headers["Authorization"] = fmt.Sprintf("Bearer;%s", BearerToken)
// URL查看上方第四点: 4.并发合成接口(POST)
url := "https://xxxxxxxx"
timeo := 30*time.Second
bodyStr, _ := json.Marshal(params)
synResp, err := httpPost(url, headers,
[]byte(bodyStr), timeo)
if err != nil {
fmt.Printf("http post fail [err:%s]\n", err.Error())
return nil, err
}
fmt.Printf("resp body:%s\n", synResp)
var respJSON TTSServResponse
err = json.Unmarshal(synResp, &respJSON)
if err != nil {
fmt.Printf("unmarshal response fail [err:%s]\n", err.Error())
return nil, err
}
code := respJSON.Code
if code != 3000 {
fmt.Printf("code fail [code:%d]\n", code)
return nil, errors.New("resp code fail")
}
audio, _ := base64.StdEncoding.DecodeString(respJSON.Data)
return audio, nil
}
func main() {
text := "字节跳动语音合成"
audio, err := synthesis(text)
if err != nil {
fmt.Printf("synthesis fail [err:%s]\n", err.Error())
return
}
fmt.Printf("get audio succ len[%d]\n", len(audio))
}
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END