67e73b5a51
- 素材库: VoiceMaterialLibrary 支持音频/视频分类、Modal弹窗、进度弹窗 - 列表布局: 紧凑单行、灰色图标按钮、重命名功能、删除ConfirmModal - 生成配音: toast替换为ProgressModal - 私有音色显示: 描述改为createdAt日期 - 七牛上传: 修复upload_stream参数、修正put_stream参数名 - MiniMax后端: 新增Provider+Service,TTS/克隆/音色列表切到MiniMax - 前端默认音色: tianxin_xiaoling - Rust: 新增voice命令、本地音频存储、配音生成功能 - 新增shot统计组件、脚本编辑器优化
272 lines
8.0 KiB
Python
272 lines
8.0 KiB
Python
"""
|
||
MiniMax TTS 语音合成服务
|
||
==========================
|
||
|
||
提供语音合成、克隆的业务层封装,与现有 TTSService 接口对齐。
|
||
|
||
功能:
|
||
1. 同步 TTS(短文本 ≤10000 字符)
|
||
2. 异步长文本 TTS(大文本 ≤100万字符)
|
||
3. 语音克隆(上传音频 → 获取 voice_id)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
|
||
from app.ai.providers.minimax_provider import MiniMaxProvider
|
||
from app.config import get_settings
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# MiniMax 系统预设音色(中文常用)
|
||
MINIMAX_PRESET_VOICES = [
|
||
{
|
||
"voice_id": "junlang_nanyou",
|
||
"name": "俊朗男友",
|
||
"language": "zh",
|
||
"description": "成熟稳重,温暖亲切",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/junlang_nanyou.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Radio_Host",
|
||
"name": "电台男主播",
|
||
"language": "zh",
|
||
"description": "专业播报,清晰有力",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Radio_Host.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Lyrical_Voice",
|
||
"name": "抒情男声",
|
||
"language": "zh",
|
||
"description": "深情款款,富有感染力",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Lyrical_Voice.mp3",
|
||
},
|
||
{
|
||
"voice_id": "tianxin_xiaoling",
|
||
"name": "甜心小玲",
|
||
"language": "zh",
|
||
"description": "甜美可爱,活泼俏皮",
|
||
"recommended": True,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/tianxin_xiaoling.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Gentle_Senior",
|
||
"name": "温柔学姐",
|
||
"language": "zh",
|
||
"description": "温柔知性,娓娓道来",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Gentle_Senior.mp3",
|
||
},
|
||
{
|
||
"voice_id": "Chinese (Mandarin)_Warm_Girl",
|
||
"name": "温暖少女",
|
||
"language": "zh",
|
||
"description": "轻柔细腻,清新自然",
|
||
"recommended": False,
|
||
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Warm_Girl.mp3",
|
||
},
|
||
]
|
||
|
||
# 默认音色:甜心小玲
|
||
DEFAULT_VOICE_ID = "tianxin_xiaoling"
|
||
|
||
|
||
class MiniMaxTTSService:
|
||
"""MiniMax TTS 服务封装"""
|
||
|
||
default_voice_id: str = DEFAULT_VOICE_ID
|
||
|
||
def __init__(self) -> None:
|
||
settings = get_settings()
|
||
self.provider = MiniMaxProvider(
|
||
api_key=settings.MINIMAX_API_KEY,
|
||
base_url=settings.MINIMAX_BASE_URL,
|
||
)
|
||
|
||
# ==================== 同步 TTS ====================
|
||
|
||
async def synthesize_sync(
|
||
self,
|
||
text: str,
|
||
voice_id: str | None = None,
|
||
speed: float = 1.0,
|
||
**kwargs,
|
||
) -> str:
|
||
"""
|
||
同步语音合成,返回音频 URL。
|
||
|
||
Args:
|
||
text: 待合成文本(≤10000 字符)
|
||
voice_id: 音色 ID(默认:甜心小玲)
|
||
speed: 语速(0.8-2.0)
|
||
|
||
Returns:
|
||
音频 URL(有效期 24 小时)
|
||
"""
|
||
if not text or not text.strip():
|
||
raise ValueError("text 不能为空")
|
||
|
||
voice = voice_id or self.default_voice_id
|
||
|
||
result = await self.provider.tts_sync(
|
||
text=text,
|
||
voice_id=voice,
|
||
speed=speed,
|
||
output_format="url",
|
||
**kwargs,
|
||
)
|
||
|
||
audio_url = result.get("audio") or result.get("audio_url")
|
||
if not audio_url:
|
||
raise ValueError("TTS 合成失败: 未返回音频 URL")
|
||
|
||
logger.info(f"[MiniMax TTS] 合成成功: voice_id={voice}, url={audio_url[:60]}...")
|
||
return audio_url
|
||
|
||
# ==================== 异步长文本 TTS ====================
|
||
|
||
async def synthesize_async_create(
|
||
self,
|
||
text: str,
|
||
voice_id: str | None = None,
|
||
speed: float = 1.0,
|
||
**kwargs,
|
||
) -> str:
|
||
"""
|
||
创建异步长文本 TTS 任务,返回 task_id。
|
||
|
||
Args:
|
||
text: 待合成文本(≤100万字符)
|
||
voice_id: 音色 ID
|
||
speed: 语速
|
||
|
||
Returns:
|
||
task_id
|
||
"""
|
||
if not text or not text.strip():
|
||
raise ValueError("text 不能为空")
|
||
|
||
voice = voice_id or self.default_voice_id
|
||
|
||
result = await self.provider.tts_async_create(
|
||
text=text,
|
||
voice_id=voice,
|
||
speed=speed,
|
||
**kwargs,
|
||
)
|
||
|
||
task_id = result.get("task_id")
|
||
if not task_id:
|
||
raise ValueError("异步 TTS 任务创建失败: 未返回 task_id")
|
||
|
||
logger.info(f"[MiniMax TTS Async] 任务创建成功: task_id={task_id}")
|
||
return task_id
|
||
|
||
async def query_async_task(self, task_id: str) -> dict:
|
||
"""
|
||
查询异步 TTS 任务状态。
|
||
|
||
Returns:
|
||
{
|
||
"status": "Queueing" | "Processing" | "Success" | "Fail",
|
||
"audio_url": "...", # Success 时有
|
||
"file_id": "...", # Success 时有
|
||
"duration": 123.45, # Success 时有(秒)
|
||
"error_msg": "...", # Fail 时有
|
||
}
|
||
"""
|
||
result = await self.provider.tts_async_query(task_id)
|
||
status = result.get("status", "Queueing")
|
||
|
||
ret = {
|
||
"status": status,
|
||
"task_id": task_id,
|
||
}
|
||
|
||
if status == "Success":
|
||
ret["audio_url"] = result.get("audio_url")
|
||
ret["file_id"] = result.get("file_id")
|
||
ret["duration"] = result.get("duration")
|
||
elif status == "Fail":
|
||
ret["error_msg"] = result.get("error_msg", "任务失败")
|
||
|
||
return ret
|
||
|
||
# ==================== 语音克隆 ====================
|
||
|
||
async def clone_voice(
|
||
self,
|
||
audio_url: str,
|
||
voice_name: str,
|
||
sample_audio_url: str | None = None,
|
||
) -> str:
|
||
"""
|
||
提交语音克隆任务,返回 task_id。
|
||
|
||
Args:
|
||
audio_url: 目标克隆音频 URL(5-30秒,公开可访问)
|
||
voice_name: 音色名称(≤20字符)
|
||
sample_audio_url: 可选,示例音频 URL 提升克隆质量
|
||
|
||
Returns:
|
||
task_id
|
||
"""
|
||
result = await self.provider.clone_voice(
|
||
audio_url=audio_url,
|
||
voice_name=voice_name,
|
||
sample_audio_url=sample_audio_url,
|
||
)
|
||
task_id = result.get("task_id")
|
||
if not task_id:
|
||
raise ValueError("克隆任务提交失败: 未返回 task_id")
|
||
logger.info(f"[MiniMax Clone] 提交成功: task_id={task_id}")
|
||
return task_id
|
||
|
||
async def query_clone_task(self, task_id: str) -> dict:
|
||
"""
|
||
查询语音克隆任务状态。
|
||
|
||
Returns:
|
||
{
|
||
"status": "Queueing" | "Processing" | "Success" | "Fail",
|
||
"voice_id": "...", # Success 时有
|
||
"trial_url": "...", # Success 时有
|
||
"error_msg": "...", # Fail 时有
|
||
}
|
||
"""
|
||
result = await self.provider.query_clone_task(task_id)
|
||
status = result.get("status", "Queueing")
|
||
|
||
ret = {
|
||
"status": status,
|
||
"task_id": task_id,
|
||
}
|
||
|
||
if status == "Success":
|
||
ret["voice_id"] = result.get("voice_id")
|
||
ret["trial_url"] = result.get("trial_url")
|
||
elif status == "Fail":
|
||
ret["error_msg"] = result.get("error_msg", "克隆失败")
|
||
|
||
return ret
|
||
|
||
# ==================== 预设音色 ====================
|
||
|
||
@staticmethod
|
||
def get_preset_voices() -> list[dict]:
|
||
"""获取预设音色列表"""
|
||
return MINIMAX_PRESET_VOICES
|
||
|
||
@staticmethod
|
||
def get_voice_by_id(voice_id: str) -> dict | None:
|
||
"""根据 ID 获取音色信息"""
|
||
for voice in MINIMAX_PRESET_VOICES:
|
||
if voice["voice_id"] == voice_id:
|
||
return voice
|
||
return None
|