67e73b5a51
- 素材库: VoiceMaterialLibrary 支持音频/视频分类、Modal弹窗、进度弹窗 - 列表布局: 紧凑单行、灰色图标按钮、重命名功能、删除ConfirmModal - 生成配音: toast替换为ProgressModal - 私有音色显示: 描述改为createdAt日期 - 七牛上传: 修复upload_stream参数、修正put_stream参数名 - MiniMax后端: 新增Provider+Service,TTS/克隆/音色列表切到MiniMax - 前端默认音色: tianxin_xiaoling - Rust: 新增voice命令、本地音频存储、配音生成功能 - 新增shot统计组件、脚本编辑器优化
314 lines
9.7 KiB
Python
314 lines
9.7 KiB
Python
"""
|
|
TTS 服务层
|
|
==========
|
|
|
|
封装 Kling AI TTS API,提供语音合成能力。
|
|
API 文档:https://klingai.com/document-api
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from app.ai.providers.klingai_provider import KlingAIProvider
|
|
from app.config import get_settings
|
|
from app.services.qiniu_service import get_qiniu_service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Kling TTS API 配置
|
|
TTS_TASK_TIMEOUT = 120 # TTS 任务最大等待时间(秒)
|
|
TTS_POLL_INTERVAL = 2.0 # 轮询间隔(秒)
|
|
|
|
|
|
def _get_kling_provider() -> KlingAIProvider:
|
|
"""获取 KlingAI Provider 实例"""
|
|
settings = get_settings()
|
|
config = {
|
|
"access_key": settings.KLINGAI_ACCESS_KEY or "",
|
|
"secret_key": settings.KLINGAI_SECRET_KEY or "",
|
|
}
|
|
return KlingAIProvider(config)
|
|
|
|
|
|
class TTSService:
|
|
"""
|
|
Kling AI TTS 服务客户端
|
|
|
|
⚠️ 已废弃:语音合成功能已迁移至 MiniMaxTTSService
|
|
保留此文件仅用于历史兼容,新代码请使用 MiniMaxTTSService
|
|
"""
|
|
|
|
# Kling 官方预设音色(已废弃,仅视频生成场景仍可能使用)
|
|
|
|
# Kling 官方预设音色
|
|
PRESET_VOICES = [
|
|
{
|
|
"voice_id": "ai_shatang",
|
|
"name": "钓系女友",
|
|
"language": "zh",
|
|
"description": "甜美撒娇",
|
|
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/ai_shatang.mp3",
|
|
"recommended": False,
|
|
},
|
|
{
|
|
"voice_id": "chat1_female_new-3",
|
|
"name": "温柔女声",
|
|
"language": "zh",
|
|
"description": "温柔细腻",
|
|
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/chat1_female_new-3.mp3",
|
|
"recommended": True,
|
|
},
|
|
{
|
|
"voice_id": "yizhipiannan-v1",
|
|
"name": "播报男声",
|
|
"language": "zh",
|
|
"description": "沉稳播报",
|
|
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/yizhipiannan-v1.mp3",
|
|
"recommended": False,
|
|
},
|
|
{
|
|
"voice_id": "tiexin_nanyou",
|
|
"name": "盐系少年",
|
|
"language": "zh",
|
|
"description": "清新少年",
|
|
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/tiexin_nanyou.mp3",
|
|
"recommended": False,
|
|
},
|
|
{
|
|
"voice_id": "girlfriend_1_speech02",
|
|
"name": "撒娇女友",
|
|
"language": "zh",
|
|
"description": "可爱撒娇",
|
|
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/girlfriend_1_speech02.mp3",
|
|
"recommended": False,
|
|
},
|
|
]
|
|
|
|
|
|
def __init__(self) -> None:
|
|
self.provider = _get_kling_provider()
|
|
self.default_voice_id = "829826751244537879" # 温柔女声
|
|
|
|
async def synthesize_sync(
|
|
self,
|
|
text: str,
|
|
voice_id: str | None = None,
|
|
speed: float = 1.0,
|
|
voice_language: str = "zh",
|
|
volume: float = 1.0,
|
|
pitch: int = 0,
|
|
) -> str:
|
|
"""
|
|
同步合成语音(提交任务并等待完成),返回音频 URL。
|
|
|
|
Args:
|
|
text: 待合成文本(≤1000字符)
|
|
voice_id: 音色 ID(默认使用温柔女声)
|
|
speed: 语速 (0.8-2.0)
|
|
voice_language: 语言 (zh/en)
|
|
volume: 音量 (0.5-10.0)
|
|
pitch: 音调 (-10 到 10)
|
|
|
|
Returns:
|
|
音频 URL
|
|
|
|
Raises:
|
|
ValueError: 参数校验失败
|
|
TimeoutError: 等待超时
|
|
"""
|
|
if not text or not text.strip():
|
|
raise ValueError("text 不能为空")
|
|
|
|
if len(text) > 1000:
|
|
raise ValueError("text 不能超过 1000 字符")
|
|
|
|
voice = voice_id or self.default_voice_id
|
|
|
|
# 提交 TTS 任务
|
|
result = await self.provider.generate_tts(
|
|
text=text,
|
|
voice_id=voice,
|
|
voice_language=voice_language,
|
|
voice_speed=speed,
|
|
voice_volume=volume,
|
|
voice_pitch=pitch,
|
|
)
|
|
|
|
task_id = result.get("task_id")
|
|
if not task_id:
|
|
raise ValueError("TTS 任务提交失败: 未返回 task_id")
|
|
|
|
logger.info(f"[TTS] 任务已提交: task_id={task_id}")
|
|
|
|
# 先检查提交返回的结果,如果已完成直接返回
|
|
submit_status = result.get("task_status", "")
|
|
if submit_status == "succeed":
|
|
audio_url = self._extract_audio_url(result)
|
|
if audio_url:
|
|
return audio_url
|
|
|
|
# 等待任务完成
|
|
audio_url = await self._wait_for_task(task_id)
|
|
|
|
return audio_url
|
|
|
|
def _extract_audio_url(self, result: dict) -> str | None:
|
|
"""从 Kling TTS 响应中提取音频 URL"""
|
|
task_result = result.get("task_result", {})
|
|
if isinstance(task_result, dict):
|
|
audios = task_result.get("audios", [])
|
|
if audios and isinstance(audios, list):
|
|
return audios[0].get("url")
|
|
# 兜底:某些响应格式直接放在顶层
|
|
return result.get("audio_url")
|
|
|
|
async def _wait_for_task(self, task_id: str) -> str:
|
|
"""等待 TTS 任务完成并返回音频 URL"""
|
|
elapsed = 0.0
|
|
while elapsed < TTS_TASK_TIMEOUT:
|
|
await asyncio.sleep(TTS_POLL_INTERVAL)
|
|
elapsed += TTS_POLL_INTERVAL
|
|
|
|
result = await self.provider.get_tts_task(task_id)
|
|
status = result.get("task_status", "")
|
|
|
|
logger.debug(f"[TTS] task_id={task_id}, status={status}, elapsed={elapsed}s")
|
|
|
|
if status == "succeed":
|
|
audio_url = self._extract_audio_url(result)
|
|
if audio_url:
|
|
return audio_url
|
|
raise ValueError("TTS 任务成功但未返回音频 URL")
|
|
|
|
if status in ("failed", "error"):
|
|
raise ValueError(f"TTS 任务失败: {result.get('task_status_msg', '未知错误')}")
|
|
|
|
raise TimeoutError(f"TTS 任务等待超时({TTS_TASK_TIMEOUT}秒)")
|
|
|
|
async def synthesize_to_file(
|
|
self,
|
|
text: str,
|
|
output_path: str | Path,
|
|
voice_id: str | None = None,
|
|
speed: float = 1.0,
|
|
voice_language: str = "zh",
|
|
volume: float = 1.0,
|
|
pitch: int = 0,
|
|
) -> Path:
|
|
"""
|
|
合成语音并保存到文件。
|
|
|
|
Args:
|
|
text: 待合成文本
|
|
output_path: 输出文件路径
|
|
voice_id: 音色 ID
|
|
speed: 语速
|
|
voice_language: 语言
|
|
volume: 音量 (0.5-10.0)
|
|
pitch: 音调 (-10 到 10)
|
|
|
|
Returns:
|
|
输出文件路径
|
|
"""
|
|
import httpx
|
|
|
|
output_path = Path(output_path)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# 获取音频 URL
|
|
audio_url = await self.synthesize_sync(
|
|
text=text,
|
|
voice_id=voice_id,
|
|
speed=speed,
|
|
voice_language=voice_language,
|
|
volume=volume,
|
|
pitch=pitch,
|
|
)
|
|
|
|
# 下载音频并保存
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
response = await client.get(audio_url)
|
|
response.raise_for_status()
|
|
audio_bytes = response.content
|
|
|
|
output_path.write_bytes(audio_bytes)
|
|
logger.info(f"[TTS] 语音合成完成: {output_path}")
|
|
return output_path
|
|
|
|
async def batch_synthesize(
|
|
self,
|
|
segments: list[dict],
|
|
output_dir: str | Path,
|
|
voice_id: str | None = None,
|
|
speed: float = 1.0,
|
|
volume: float = 1.0,
|
|
pitch: int = 0,
|
|
) -> list[dict]:
|
|
"""
|
|
批量合成多段语音。
|
|
|
|
Args:
|
|
segments: 分段列表,每项包含 text, index(可选), filename(可选)
|
|
output_dir: 输出目录
|
|
voice_id: 音色 ID
|
|
speed: 语速
|
|
volume: 音量 (0.5-10.0)
|
|
pitch: 音调 (-10 到 10)
|
|
|
|
Returns:
|
|
结果列表,每项包含 input(原始输入)和 output(输出文件路径或错误信息)
|
|
"""
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
results = []
|
|
for seg in segments:
|
|
text = seg.get("text", "")
|
|
index = seg.get("index", len(results))
|
|
filename = seg.get("filename", f"audio_{index:04d}.mp3")
|
|
|
|
try:
|
|
output_path = await self.synthesize_to_file(
|
|
text=text,
|
|
output_path=output_dir / filename,
|
|
voice_id=voice_id,
|
|
speed=speed,
|
|
volume=volume,
|
|
pitch=pitch,
|
|
)
|
|
results.append({
|
|
"index": index,
|
|
"text": text,
|
|
"output_path": str(output_path),
|
|
"success": True,
|
|
"error": None,
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"[TTS] 分段 {index} 合成失败: {e}")
|
|
results.append({
|
|
"index": index,
|
|
"text": text,
|
|
"output_path": None,
|
|
"success": False,
|
|
"error": str(e),
|
|
})
|
|
|
|
return results
|
|
|
|
@staticmethod
|
|
def get_preset_voices() -> list[dict]:
|
|
"""获取预设音色列表
|
|
|
|
返回预先生成并上传到七牛云的试听音频 URL
|
|
"""
|
|
return TTSService.PRESET_VOICES
|
|
|
|
@staticmethod
|
|
def get_voice_by_id(voice_id: str) -> dict | None:
|
|
"""根据 ID 获取音色信息"""
|
|
for voice in TTSService.PRESET_VOICES:
|
|
if voice["voice_id"] == voice_id:
|
|
return voice
|
|
return None
|