火山引擎语音合成 HTTP接口调用代码及方法

火山引擎语音合成 HTTP接口调用代码及方法

前言说明

火山引擎语音合成是由字节跳动推出的一项AI技术服务,可将文字内容转化为高质量、自然流畅的音频内容。它采用了深度学习、语音信号处理等技术,通过机器生成的方式实现语音合成。

火山引擎语音合成提供了多种发音角色和音色,包括中英文、粤语、普通话等多种常见语言和方言,并支持调节语速、音调等参数,以满足不同场景下的需求。

火山引擎语音合成 HTTP接口调用代码及方法插图
火山引擎语音合成

通过使用火山引擎语音合成,用户可以快速生成自然流畅、高质量的语音内容,广泛应用于在线教育、智能客服、语音导航、语音广告等领域。

脚本代码

Python环境

#coding=utf-8

'''
requires Python 3.6 or later
pip install requests
'''
import base64
import json
import uuid
import requests

# 填写平台申请的appid, access_token以及cluster
appid = "xxxx"
access_token= "xxxx"
cluster = "xxxx"

voice_type = "xxxx"
host = "openspeech.bytedance.com"
api_url = f"https://{host}/api/v1/tts"

header = {"Authorization": f"Bearer;{access_token}"}

request_json = {
    "app": {
        "appid": appid,
        "token": "access_token",
        "cluster": cluster
    },
    "user": {
        "uid": "388808087185088"
    },
    "audio": {
        "voice": "other",
        "voice_type": voice_type,
        "encoding": "mp3",
        "speed": 10,
        "volume": 10,
        "pitch": 10
    },
    "request": {
        "reqid": str(uuid.uuid4()),
        "text": "字节跳动语音合成",
        "text_type": "plain",
        "operation": "query",
        "with_frontend": 1,
        "frontend_type": "unitTson"

    }
}

if __name__ == '__main__':
    try:
        resp = requests.post(api_url, json.dumps(request_json), headers=header)
        print(f"resp body: \n{resp.json()}")
        if "data" in resp.json():
            data = resp.json()["data"]
            file_to_save = open("test_submit.mp3", "wb")
            file_to_save.write(base64.b64decode(data))
    except Exception as e:
        e.with_traceback()

Go环境

package main
import (
    "fmt"
    "time"
    "bytes"
    "errors"
    "io/ioutil"
    "net/http"
    "encoding/json"
    "encoding/base64"
    "github.com/satori/go.uuid"
)
//TTSServResponse response from backend srvs
type TTSServResponse struct {
    ReqID    string        `json:"reqid"`
    Code      int          `json:"code"`
    Message   string       `json:"Message"`
    Operation string       `json:"operation"`
    Sequence  int          `json:"sequence"`
    Data      string       `json:"data"`
}
func httpPost(url string, headers map[string]string, body []byte,
    timeout time.Duration) ([]byte, error) {
    client := &http.Client{
        Timeout: timeout,
    }
    req, err := http.NewRequest(http.MethodPost, url, bytes.NewBuffer(body))
    if err != nil {
        return nil, err
    }
    for key, value := range headers {
        req.Header.Set(key, value)
    }
    resp, err := client.Do(req)
    if err != nil {
        return nil, err
    }
    defer resp.Body.Close()
    retBody, err := ioutil.ReadAll(resp.Body)
    if err != nil {
        return nil, err
    }
    return retBody, err
}
func synthesis(text string) ([]byte, error) {
    reqID := uuid.Must(uuid.NewV4()).String()
    params := make(map[string]map[string]interface{})
    params["app"] = make(map[string]interface{})
    //填写平台申请的appid
    params["app"]["appid"] = "xxxx"
    //这部分的token不生效,填写下方的默认值就好
    params["app"]["token"] = "access_token"
    //填写平台上显示的集群名称
    params["app"]["cluster"] = "xxxx"
    params["user"] = make(map[string]interface{})
    //这部分如有需要,可以传递用户真实的ID,方便问题定位
    params["user"]["uid"] = "uid"
    params["audio"] = make(map[string]interface{})
    params["audio"]["voice"] = "other"
    //填写选中的音色代号
    params["audio"]["voice_type"] = "xxxx"
    params["audio"]["encoding"] = "wav"
    params["audio"]["speed"] = 10
    params["audio"]["volume"] = 10
    params["audio"]["pitch"] = 10
    params["request"] = make(map[string]interface{})
    params["request"]["reqid"] = reqID
    params["request"]["text"] = text
    params["request"]["text_type"] = "plain"
    params["request"]["operation"] = "query"

    headers := make(map[string]string)
    headers["Content-Type"] = "application/json"
    //bearerToken为saas平台对应的接入认证中的Token
    headers["Authorization"] = fmt.Sprintf("Bearer;%s", BearerToken)

    // URL查看上方第四点: 4.并发合成接口(POST)
    url := "https://xxxxxxxx"
    timeo := 30*time.Second
    bodyStr, _ := json.Marshal(params)
    synResp, err := httpPost(url, headers,
        []byte(bodyStr), timeo)
    if err != nil {
        fmt.Printf("http post fail [err:%s]\n", err.Error())
        return nil, err
    }
    fmt.Printf("resp body:%s\n", synResp)
    var respJSON TTSServResponse
    err = json.Unmarshal(synResp, &respJSON)
    if err != nil {
        fmt.Printf("unmarshal response fail [err:%s]\n", err.Error())
        return nil, err
    }
    code := respJSON.Code
    if code != 3000 {
        fmt.Printf("code fail [code:%d]\n", code)
        return nil, errors.New("resp code fail")
    }

    audio, _ := base64.StdEncoding.DecodeString(respJSON.Data)
    return audio, nil
}
func main() {
    text := "字节跳动语音合成"
    audio, err := synthesis(text)
    if err != nil {
        fmt.Printf("synthesis fail [err:%s]\n", err.Error())
        return
    }
    fmt.Printf("get audio succ len[%d]\n", len(audio))
}

 

© 版权声明
THE END
喜欢就支持一下吧
点赞10 分享