Files
meijiaka-zy/python-api/app/services/tts_service.py
T
小鱼开发 67e73b5a51 feat: 素材库重构、七牛上传修复、配音页面优化、MiniMax后端接入
- 素材库: VoiceMaterialLibrary 支持音频/视频分类、Modal弹窗、进度弹窗
- 列表布局: 紧凑单行、灰色图标按钮、重命名功能、删除ConfirmModal
- 生成配音: toast替换为ProgressModal
- 私有音色显示: 描述改为createdAt日期
- 七牛上传: 修复upload_stream参数、修正put_stream参数名
- MiniMax后端: 新增Provider+Service,TTS/克隆/音色列表切到MiniMax
- 前端默认音色: tianxin_xiaoling
- Rust: 新增voice命令、本地音频存储、配音生成功能
- 新增shot统计组件、脚本编辑器优化
2026-04-21 23:27:08 +08:00

314 lines
9.7 KiB
Python

"""
TTS 服务层
==========
封装 Kling AI TTS API,提供语音合成能力。
API 文档:https://klingai.com/document-api
"""
import asyncio
import logging
from pathlib import Path
from app.ai.providers.klingai_provider import KlingAIProvider
from app.config import get_settings
from app.services.qiniu_service import get_qiniu_service
logger = logging.getLogger(__name__)
# Kling TTS API 配置
TTS_TASK_TIMEOUT = 120 # TTS 任务最大等待时间(秒)
TTS_POLL_INTERVAL = 2.0 # 轮询间隔(秒)
def _get_kling_provider() -> KlingAIProvider:
"""获取 KlingAI Provider 实例"""
settings = get_settings()
config = {
"access_key": settings.KLINGAI_ACCESS_KEY or "",
"secret_key": settings.KLINGAI_SECRET_KEY or "",
}
return KlingAIProvider(config)
class TTSService:
"""
Kling AI TTS 服务客户端
⚠️ 已废弃:语音合成功能已迁移至 MiniMaxTTSService
保留此文件仅用于历史兼容,新代码请使用 MiniMaxTTSService
"""
# Kling 官方预设音色(已废弃,仅视频生成场景仍可能使用)
# Kling 官方预设音色
PRESET_VOICES = [
{
"voice_id": "ai_shatang",
"name": "钓系女友",
"language": "zh",
"description": "甜美撒娇",
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/ai_shatang.mp3",
"recommended": False,
},
{
"voice_id": "chat1_female_new-3",
"name": "温柔女声",
"language": "zh",
"description": "温柔细腻",
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/chat1_female_new-3.mp3",
"recommended": True,
},
{
"voice_id": "yizhipiannan-v1",
"name": "播报男声",
"language": "zh",
"description": "沉稳播报",
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/yizhipiannan-v1.mp3",
"recommended": False,
},
{
"voice_id": "tiexin_nanyou",
"name": "盐系少年",
"language": "zh",
"description": "清新少年",
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/tiexin_nanyou.mp3",
"recommended": False,
},
{
"voice_id": "girlfriend_1_speech02",
"name": "撒娇女友",
"language": "zh",
"description": "可爱撒娇",
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/girlfriend_1_speech02.mp3",
"recommended": False,
},
]
def __init__(self) -> None:
self.provider = _get_kling_provider()
self.default_voice_id = "829826751244537879" # 温柔女声
async def synthesize_sync(
self,
text: str,
voice_id: str | None = None,
speed: float = 1.0,
voice_language: str = "zh",
volume: float = 1.0,
pitch: int = 0,
) -> str:
"""
同步合成语音(提交任务并等待完成),返回音频 URL。
Args:
text: 待合成文本(≤1000字符)
voice_id: 音色 ID(默认使用温柔女声)
speed: 语速 (0.8-2.0)
voice_language: 语言 (zh/en)
volume: 音量 (0.5-10.0)
pitch: 音调 (-10 到 10)
Returns:
音频 URL
Raises:
ValueError: 参数校验失败
TimeoutError: 等待超时
"""
if not text or not text.strip():
raise ValueError("text 不能为空")
if len(text) > 1000:
raise ValueError("text 不能超过 1000 字符")
voice = voice_id or self.default_voice_id
# 提交 TTS 任务
result = await self.provider.generate_tts(
text=text,
voice_id=voice,
voice_language=voice_language,
voice_speed=speed,
voice_volume=volume,
voice_pitch=pitch,
)
task_id = result.get("task_id")
if not task_id:
raise ValueError("TTS 任务提交失败: 未返回 task_id")
logger.info(f"[TTS] 任务已提交: task_id={task_id}")
# 先检查提交返回的结果,如果已完成直接返回
submit_status = result.get("task_status", "")
if submit_status == "succeed":
audio_url = self._extract_audio_url(result)
if audio_url:
return audio_url
# 等待任务完成
audio_url = await self._wait_for_task(task_id)
return audio_url
def _extract_audio_url(self, result: dict) -> str | None:
"""从 Kling TTS 响应中提取音频 URL"""
task_result = result.get("task_result", {})
if isinstance(task_result, dict):
audios = task_result.get("audios", [])
if audios and isinstance(audios, list):
return audios[0].get("url")
# 兜底:某些响应格式直接放在顶层
return result.get("audio_url")
async def _wait_for_task(self, task_id: str) -> str:
"""等待 TTS 任务完成并返回音频 URL"""
elapsed = 0.0
while elapsed < TTS_TASK_TIMEOUT:
await asyncio.sleep(TTS_POLL_INTERVAL)
elapsed += TTS_POLL_INTERVAL
result = await self.provider.get_tts_task(task_id)
status = result.get("task_status", "")
logger.debug(f"[TTS] task_id={task_id}, status={status}, elapsed={elapsed}s")
if status == "succeed":
audio_url = self._extract_audio_url(result)
if audio_url:
return audio_url
raise ValueError("TTS 任务成功但未返回音频 URL")
if status in ("failed", "error"):
raise ValueError(f"TTS 任务失败: {result.get('task_status_msg', '未知错误')}")
raise TimeoutError(f"TTS 任务等待超时({TTS_TASK_TIMEOUT}秒)")
async def synthesize_to_file(
self,
text: str,
output_path: str | Path,
voice_id: str | None = None,
speed: float = 1.0,
voice_language: str = "zh",
volume: float = 1.0,
pitch: int = 0,
) -> Path:
"""
合成语音并保存到文件。
Args:
text: 待合成文本
output_path: 输出文件路径
voice_id: 音色 ID
speed: 语速
voice_language: 语言
volume: 音量 (0.5-10.0)
pitch: 音调 (-10 到 10)
Returns:
输出文件路径
"""
import httpx
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
# 获取音频 URL
audio_url = await self.synthesize_sync(
text=text,
voice_id=voice_id,
speed=speed,
voice_language=voice_language,
volume=volume,
pitch=pitch,
)
# 下载音频并保存
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(audio_url)
response.raise_for_status()
audio_bytes = response.content
output_path.write_bytes(audio_bytes)
logger.info(f"[TTS] 语音合成完成: {output_path}")
return output_path
async def batch_synthesize(
self,
segments: list[dict],
output_dir: str | Path,
voice_id: str | None = None,
speed: float = 1.0,
volume: float = 1.0,
pitch: int = 0,
) -> list[dict]:
"""
批量合成多段语音。
Args:
segments: 分段列表,每项包含 text, index(可选), filename(可选)
output_dir: 输出目录
voice_id: 音色 ID
speed: 语速
volume: 音量 (0.5-10.0)
pitch: 音调 (-10 到 10)
Returns:
结果列表,每项包含 input(原始输入)和 output(输出文件路径或错误信息)
"""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
results = []
for seg in segments:
text = seg.get("text", "")
index = seg.get("index", len(results))
filename = seg.get("filename", f"audio_{index:04d}.mp3")
try:
output_path = await self.synthesize_to_file(
text=text,
output_path=output_dir / filename,
voice_id=voice_id,
speed=speed,
volume=volume,
pitch=pitch,
)
results.append({
"index": index,
"text": text,
"output_path": str(output_path),
"success": True,
"error": None,
})
except Exception as e:
logger.error(f"[TTS] 分段 {index} 合成失败: {e}")
results.append({
"index": index,
"text": text,
"output_path": None,
"success": False,
"error": str(e),
})
return results
@staticmethod
def get_preset_voices() -> list[dict]:
"""获取预设音色列表
返回预先生成并上传到七牛云的试听音频 URL
"""
return TTSService.PRESET_VOICES
@staticmethod
def get_voice_by_id(voice_id: str) -> dict | None:
"""根据 ID 获取音色信息"""
for voice in TTSService.PRESET_VOICES:
if voice["voice_id"] == voice_id:
return voice
return None