Files
meijiaka-zy/python-api/app/services/vidu_tts_service.py
T
小鱼开发 189fdf5ed6 feat: 接入 Vidu TTS/复刻/对口型,替换 MiniMax 语音能力
- 新增 ViduProvider: TTS同步、声音复刻、对口型、任务查询
- 新增 ViduTTSService: 业务封装,6个精选中文预设音色
- Voice API 路由全面切换至 Vidu
- 新增 /voice/lip-sync 对口型异步接口
- 前端适配: 16个音色→6个、slider范围更新、音量默认0
- 添加 vidu-tts-api.md 开发文档
- docker-compose 新增 VIDU_API_KEY 环境变量映射
2026-04-21 23:26:24 +08:00

242 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Vidu TTS 服务封装
=================
业务层封装:
- 同步 TTS
- 声音复刻
- 对口型(异步,需轮询)
- 预设音色列表
"""
from __future__ import annotations
import logging
from typing import Any
from app.ai.providers.vidu_provider import ViduProvider
logger = logging.getLogger(__name__)
# Vidu 预设音色(底层为 MiniMax,兼容 MiniMax 音色 ID
VIDU_PRESET_VOICES = [
{
"voice_id": "tianxin_xiaoling",
"name": "甜心小玲",
"language": "zh",
"description": "甜美可爱,活泼俏皮",
"recommended": True,
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/tianxin_xiaoling.mp3",
},
{
"voice_id": "danya_xuejie",
"name": "淡雅学姐",
"language": "zh",
"description": "淡雅知性,温婉柔和",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/danya_xuejie.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Warm_Girl",
"name": "温暖少女",
"language": "zh",
"description": "温暖亲切,清新自然",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Warm_Girl.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Radio_Host",
"name": "电台男主播",
"language": "zh",
"description": "专业播报,沉稳有力",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Radio_Host.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Straightforward_Boy",
"name": "率真弟弟",
"language": "zh",
"description": "率真爽朗,青春阳光",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Straightforward_Boy.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Gentleman",
"name": "温润男声",
"language": "zh",
"description": "温润如玉,低沉磁性",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/voice/Gentleman.mp3",
},
]
DEFAULT_VOICE_ID = "tianxin_xiaoling"
class ViduTTSService:
"""Vidu TTS 服务封装"""
def __init__(self):
self.provider = ViduProvider()
# ==================== 预设音色 ====================
@staticmethod
def get_preset_voices() -> list[dict]:
"""获取预设音色列表"""
return VIDU_PRESET_VOICES
@staticmethod
def get_voice_by_id(voice_id: str) -> dict | None:
"""根据 ID 获取音色信息"""
for voice in VIDU_PRESET_VOICES:
if voice["voice_id"] == voice_id:
return voice
return None
# ==================== 同步 TTS ====================
async def synthesize_sync(
self,
text: str,
voice_id: str | None = None,
speed: float = 1.0,
volume: int = 0,
pitch: int = 0,
**kwargs,
) -> str:
"""
同步语音合成,返回音频 URL。
Args:
text: 待合成文本(≤10000 字符)
voice_id: 音色 ID(默认:甜心小玲)
speed: 语速(0.5-2.0
volume: 音量(0-100=正常)
pitch: 语调(-12~12
Returns:
音频 URL
"""
if not text or not text.strip():
raise ValueError("text 不能为空")
voice = voice_id or DEFAULT_VOICE_ID
result = await self.provider.tts_sync(
text=text,
voice_id=voice,
speed=speed,
volume=volume,
pitch=pitch,
**kwargs,
)
audio_url = result.get("file_url")
if not audio_url:
raise ValueError("TTS 合成失败: 未返回音频 URL")
logger.info(f"[Vidu TTS] 合成成功: voice_id={voice}, url={audio_url[:60]}...")
return audio_url
# ==================== 声音复刻 ====================
async def clone_voice(
self,
audio_url: str,
voice_id: str,
text: str | None = None,
prompt_audio_url: str | None = None,
prompt_text: str | None = None,
) -> dict[str, Any]:
"""
声音复刻(同步接口)。
Args:
audio_url: 原音频 URL
voice_id: 自定义 voice_id8~256字符,首字符字母)
text: 试听文本(≤1000字符,不传则不会生成试听音频)
prompt_audio_url: 示例音频 URL<8秒)
prompt_text: 示例音频对应文本
Returns:
复刻结果 dict,包含 voice_id、demo_audio 等
"""
trial_text = text or "你好,欢迎使用vidu开放平台"
result = await self.provider.clone_voice(
audio_url=audio_url,
voice_id=voice_id,
text=trial_text,
prompt_audio_url=prompt_audio_url,
prompt_text=prompt_text,
)
logger.info(f"[Vidu Clone] 复刻成功: voice_id={result.get('voice_id')}")
return result
async def query_clone_task(self, voice_id: str) -> dict[str, Any]:
"""
Vidu 声音复刻是同步接口,无独立查询。
此方法仅做兼容,返回已知的 voice_id 信息。
"""
return {"voice_id": voice_id, "status": "succeeded"}
# ==================== 对口型 ====================
async def lip_sync_create(
self,
video_url: str,
audio_url: str | None = None,
text: str | None = None,
voice_id: str | None = None,
speed: float = 1.0,
volume: int = 0,
ref_photo_url: str | None = None,
callback_url: str | None = None,
) -> str:
"""
创建对口型任务(异步接口),返回 task_id。
Args:
video_url: 原视频 URL
audio_url: 音频 URL(与 text 二选一)
text: 文本内容(与 audio_url 二选一)
voice_id: 音色 ID(文字驱动时生效)
speed: 语速(文字驱动时生效)
volume: 音量(文字驱动时生效)
ref_photo_url: 人脸参考图 URL
callback_url: 回调地址
Returns:
task_id
"""
result = await self.provider.lip_sync(
video_url=video_url,
audio_url=audio_url,
text=text,
voice_id=voice_id,
speed=speed,
volume=volume,
ref_photo_url=ref_photo_url,
callback_url=callback_url,
)
task_id = result.get("task_id")
if not task_id:
raise ValueError("对口型任务创建失败: 未返回 task_id")
logger.info(f"[Vidu LipSync] 任务创建成功: task_id={task_id}")
return task_id
async def lip_sync_query(self, task_id: str) -> dict[str, Any]:
"""
查询对口型任务状态及生成物。
Returns:
任务状态 dict,包含 state、creations 等
"""
result = await self.provider.query_task(task_id)
logger.info(f"[Vidu LipSync] 查询状态: task_id={task_id}, state={result.get('state')}")
return result