189fdf5ed6
- 新增 ViduProvider: TTS同步、声音复刻、对口型、任务查询 - 新增 ViduTTSService: 业务封装,6个精选中文预设音色 - Voice API 路由全面切换至 Vidu - 新增 /voice/lip-sync 对口型异步接口 - 前端适配: 16个音色→6个、slider范围更新、音量默认0 - 添加 vidu-tts-api.md 开发文档 - docker-compose 新增 VIDU_API_KEY 环境变量映射
242 lines
7.2 KiB
Python
242 lines
7.2 KiB
Python
"""
|
||
Vidu TTS 服务封装
|
||
=================
|
||
|
||
业务层封装:
|
||
- 同步 TTS
|
||
- 声音复刻
|
||
- 对口型(异步,需轮询)
|
||
- 预设音色列表
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from typing import Any
|
||
|
||
from app.ai.providers.vidu_provider import ViduProvider
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Vidu 预设音色(底层为 MiniMax,兼容 MiniMax 音色 ID)
|
||
VIDU_PRESET_VOICES = [
|
||
{
|
||
"voice_id": "tianxin_xiaoling",
|
||
"name": "甜心小玲",
|
||
"language": "zh",
|
||
"description": "甜美可爱,活泼俏皮",
|
||
"recommended": True,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/tianxin_xiaoling.mp3",
|
||
},
|
||
{
|
||
"voice_id": "danya_xuejie",
|
||
"name": "淡雅学姐",
|
||
"language": "zh",
|
||
"description": "淡雅知性,温婉柔和",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/danya_xuejie.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Warm_Girl",
|
||
"name": "温暖少女",
|
||
"language": "zh",
|
||
"description": "温暖亲切,清新自然",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Warm_Girl.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Radio_Host",
|
||
"name": "电台男主播",
|
||
"language": "zh",
|
||
"description": "专业播报,沉稳有力",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Radio_Host.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Straightforward_Boy",
|
||
"name": "率真弟弟",
|
||
"language": "zh",
|
||
"description": "率真爽朗,青春阳光",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Straightforward_Boy.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Gentleman",
|
||
"name": "温润男声",
|
||
"language": "zh",
|
||
"description": "温润如玉,低沉磁性",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Gentleman.mp3",
|
||
},
|
||
]
|
||
|
||
DEFAULT_VOICE_ID = "tianxin_xiaoling"
|
||
|
||
|
||
class ViduTTSService:
|
||
"""Vidu TTS 服务封装"""
|
||
|
||
def __init__(self):
|
||
self.provider = ViduProvider()
|
||
|
||
# ==================== 预设音色 ====================
|
||
|
||
@staticmethod
|
||
def get_preset_voices() -> list[dict]:
|
||
"""获取预设音色列表"""
|
||
return VIDU_PRESET_VOICES
|
||
|
||
@staticmethod
|
||
def get_voice_by_id(voice_id: str) -> dict | None:
|
||
"""根据 ID 获取音色信息"""
|
||
for voice in VIDU_PRESET_VOICES:
|
||
if voice["voice_id"] == voice_id:
|
||
return voice
|
||
return None
|
||
|
||
# ==================== 同步 TTS ====================
|
||
|
||
async def synthesize_sync(
|
||
self,
|
||
text: str,
|
||
voice_id: str | None = None,
|
||
speed: float = 1.0,
|
||
volume: int = 0,
|
||
pitch: int = 0,
|
||
**kwargs,
|
||
) -> str:
|
||
"""
|
||
同步语音合成,返回音频 URL。
|
||
|
||
Args:
|
||
text: 待合成文本(≤10000 字符)
|
||
voice_id: 音色 ID(默认:甜心小玲)
|
||
speed: 语速(0.5-2.0)
|
||
volume: 音量(0-10,0=正常)
|
||
pitch: 语调(-12~12)
|
||
|
||
Returns:
|
||
音频 URL
|
||
"""
|
||
if not text or not text.strip():
|
||
raise ValueError("text 不能为空")
|
||
|
||
voice = voice_id or DEFAULT_VOICE_ID
|
||
|
||
result = await self.provider.tts_sync(
|
||
text=text,
|
||
voice_id=voice,
|
||
speed=speed,
|
||
volume=volume,
|
||
pitch=pitch,
|
||
**kwargs,
|
||
)
|
||
|
||
audio_url = result.get("file_url")
|
||
if not audio_url:
|
||
raise ValueError("TTS 合成失败: 未返回音频 URL")
|
||
|
||
logger.info(f"[Vidu TTS] 合成成功: voice_id={voice}, url={audio_url[:60]}...")
|
||
return audio_url
|
||
|
||
# ==================== 声音复刻 ====================
|
||
|
||
async def clone_voice(
|
||
self,
|
||
audio_url: str,
|
||
voice_id: str,
|
||
text: str | None = None,
|
||
prompt_audio_url: str | None = None,
|
||
prompt_text: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""
|
||
声音复刻(同步接口)。
|
||
|
||
Args:
|
||
audio_url: 原音频 URL
|
||
voice_id: 自定义 voice_id(8~256字符,首字符字母)
|
||
text: 试听文本(≤1000字符,不传则不会生成试听音频)
|
||
prompt_audio_url: 示例音频 URL(<8秒)
|
||
prompt_text: 示例音频对应文本
|
||
|
||
Returns:
|
||
复刻结果 dict,包含 voice_id、demo_audio 等
|
||
"""
|
||
trial_text = text or "你好,欢迎使用vidu开放平台"
|
||
|
||
result = await self.provider.clone_voice(
|
||
audio_url=audio_url,
|
||
voice_id=voice_id,
|
||
text=trial_text,
|
||
prompt_audio_url=prompt_audio_url,
|
||
prompt_text=prompt_text,
|
||
)
|
||
|
||
logger.info(f"[Vidu Clone] 复刻成功: voice_id={result.get('voice_id')}")
|
||
return result
|
||
|
||
async def query_clone_task(self, voice_id: str) -> dict[str, Any]:
|
||
"""
|
||
Vidu 声音复刻是同步接口,无独立查询。
|
||
此方法仅做兼容,返回已知的 voice_id 信息。
|
||
"""
|
||
return {"voice_id": voice_id, "status": "succeeded"}
|
||
|
||
# ==================== 对口型 ====================
|
||
|
||
async def lip_sync_create(
|
||
self,
|
||
video_url: str,
|
||
audio_url: str | None = None,
|
||
text: str | None = None,
|
||
voice_id: str | None = None,
|
||
speed: float = 1.0,
|
||
volume: int = 0,
|
||
ref_photo_url: str | None = None,
|
||
callback_url: str | None = None,
|
||
) -> str:
|
||
"""
|
||
创建对口型任务(异步接口),返回 task_id。
|
||
|
||
Args:
|
||
video_url: 原视频 URL
|
||
audio_url: 音频 URL(与 text 二选一)
|
||
text: 文本内容(与 audio_url 二选一)
|
||
voice_id: 音色 ID(文字驱动时生效)
|
||
speed: 语速(文字驱动时生效)
|
||
volume: 音量(文字驱动时生效)
|
||
ref_photo_url: 人脸参考图 URL
|
||
callback_url: 回调地址
|
||
|
||
Returns:
|
||
task_id
|
||
"""
|
||
result = await self.provider.lip_sync(
|
||
video_url=video_url,
|
||
audio_url=audio_url,
|
||
text=text,
|
||
voice_id=voice_id,
|
||
speed=speed,
|
||
volume=volume,
|
||
ref_photo_url=ref_photo_url,
|
||
callback_url=callback_url,
|
||
)
|
||
|
||
task_id = result.get("task_id")
|
||
if not task_id:
|
||
raise ValueError("对口型任务创建失败: 未返回 task_id")
|
||
|
||
logger.info(f"[Vidu LipSync] 任务创建成功: task_id={task_id}")
|
||
return task_id
|
||
|
||
async def lip_sync_query(self, task_id: str) -> dict[str, Any]:
|
||
"""
|
||
查询对口型任务状态及生成物。
|
||
|
||
Returns:
|
||
任务状态 dict,包含 state、creations 等
|
||
"""
|
||
result = await self.provider.query_task(task_id)
|
||
logger.info(f"[Vidu LipSync] 查询状态: task_id={task_id}, state={result.get('state')}")
|
||
return result
|