Files
meijiaka-zy/python-api/app/services/minimax_tts_service.py
T
小鱼开发 67e73b5a51 feat: 素材库重构、七牛上传修复、配音页面优化、MiniMax后端接入
- 素材库: VoiceMaterialLibrary 支持音频/视频分类、Modal弹窗、进度弹窗
- 列表布局: 紧凑单行、灰色图标按钮、重命名功能、删除ConfirmModal
- 生成配音: toast替换为ProgressModal
- 私有音色显示: 描述改为createdAt日期
- 七牛上传: 修复upload_stream参数、修正put_stream参数名
- MiniMax后端: 新增Provider+Service,TTS/克隆/音色列表切到MiniMax
- 前端默认音色: tianxin_xiaoling
- Rust: 新增voice命令、本地音频存储、配音生成功能
- 新增shot统计组件、脚本编辑器优化
2026-04-21 23:27:08 +08:00

272 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
MiniMax TTS 语音合成服务
==========================
提供语音合成、克隆的业务层封装,与现有 TTSService 接口对齐。
功能:
1. 同步 TTS(短文本 ≤10000 字符)
2. 异步长文本 TTS(大文本 ≤100万字符)
3. 语音克隆(上传音频 → 获取 voice_id)
"""
from __future__ import annotations
import logging
from app.ai.providers.minimax_provider import MiniMaxProvider
from app.config import get_settings
logger = logging.getLogger(__name__)
# MiniMax 系统预设音色(中文常用)
MINIMAX_PRESET_VOICES = [
{
"voice_id": "junlang_nanyou",
"name": "俊朗男友",
"language": "zh",
"description": "成熟稳重,温暖亲切",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/junlang_nanyou.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Radio_Host",
"name": "电台男主播",
"language": "zh",
"description": "专业播报,清晰有力",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Radio_Host.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Lyrical_Voice",
"name": "抒情男声",
"language": "zh",
"description": "深情款款,富有感染力",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Lyrical_Voice.mp3",
},
{
"voice_id": "tianxin_xiaoling",
"name": "甜心小玲",
"language": "zh",
"description": "甜美可爱,活泼俏皮",
"recommended": True,
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/tianxin_xiaoling.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Gentle_Senior",
"name": "温柔学姐",
"language": "zh",
"description": "温柔知性,娓娓道来",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Gentle_Senior.mp3",
},
{
"voice_id": "Chinese (Mandarin)_Warm_Girl",
"name": "温暖少女",
"language": "zh",
"description": "轻柔细腻,清新自然",
"recommended": False,
"previewUrl": "https://media.liche.cn/meijiaka-zj/audios/Warm_Girl.mp3",
},
]
# 默认音色:甜心小玲
DEFAULT_VOICE_ID = "tianxin_xiaoling"
class MiniMaxTTSService:
"""MiniMax TTS 服务封装"""
default_voice_id: str = DEFAULT_VOICE_ID
def __init__(self) -> None:
settings = get_settings()
self.provider = MiniMaxProvider(
api_key=settings.MINIMAX_API_KEY,
base_url=settings.MINIMAX_BASE_URL,
)
# ==================== 同步 TTS ====================
async def synthesize_sync(
self,
text: str,
voice_id: str | None = None,
speed: float = 1.0,
**kwargs,
) -> str:
"""
同步语音合成,返回音频 URL。
Args:
text: 待合成文本(≤10000 字符)
voice_id: 音色 ID(默认:甜心小玲)
speed: 语速(0.8-2.0
Returns:
音频 URL(有效期 24 小时)
"""
if not text or not text.strip():
raise ValueError("text 不能为空")
voice = voice_id or self.default_voice_id
result = await self.provider.tts_sync(
text=text,
voice_id=voice,
speed=speed,
output_format="url",
**kwargs,
)
audio_url = result.get("audio") or result.get("audio_url")
if not audio_url:
raise ValueError("TTS 合成失败: 未返回音频 URL")
logger.info(f"[MiniMax TTS] 合成成功: voice_id={voice}, url={audio_url[:60]}...")
return audio_url
# ==================== 异步长文本 TTS ====================
async def synthesize_async_create(
self,
text: str,
voice_id: str | None = None,
speed: float = 1.0,
**kwargs,
) -> str:
"""
创建异步长文本 TTS 任务,返回 task_id。
Args:
text: 待合成文本(≤100万字符)
voice_id: 音色 ID
speed: 语速
Returns:
task_id
"""
if not text or not text.strip():
raise ValueError("text 不能为空")
voice = voice_id or self.default_voice_id
result = await self.provider.tts_async_create(
text=text,
voice_id=voice,
speed=speed,
**kwargs,
)
task_id = result.get("task_id")
if not task_id:
raise ValueError("异步 TTS 任务创建失败: 未返回 task_id")
logger.info(f"[MiniMax TTS Async] 任务创建成功: task_id={task_id}")
return task_id
async def query_async_task(self, task_id: str) -> dict:
"""
查询异步 TTS 任务状态。
Returns:
{
"status": "Queueing" | "Processing" | "Success" | "Fail",
"audio_url": "...", # Success 时有
"file_id": "...", # Success 时有
"duration": 123.45, # Success 时有(秒)
"error_msg": "...", # Fail 时有
}
"""
result = await self.provider.tts_async_query(task_id)
status = result.get("status", "Queueing")
ret = {
"status": status,
"task_id": task_id,
}
if status == "Success":
ret["audio_url"] = result.get("audio_url")
ret["file_id"] = result.get("file_id")
ret["duration"] = result.get("duration")
elif status == "Fail":
ret["error_msg"] = result.get("error_msg", "任务失败")
return ret
# ==================== 语音克隆 ====================
async def clone_voice(
self,
audio_url: str,
voice_name: str,
sample_audio_url: str | None = None,
) -> str:
"""
提交语音克隆任务,返回 task_id。
Args:
audio_url: 目标克隆音频 URL5-30秒,公开可访问)
voice_name: 音色名称(≤20字符)
sample_audio_url: 可选,示例音频 URL 提升克隆质量
Returns:
task_id
"""
result = await self.provider.clone_voice(
audio_url=audio_url,
voice_name=voice_name,
sample_audio_url=sample_audio_url,
)
task_id = result.get("task_id")
if not task_id:
raise ValueError("克隆任务提交失败: 未返回 task_id")
logger.info(f"[MiniMax Clone] 提交成功: task_id={task_id}")
return task_id
async def query_clone_task(self, task_id: str) -> dict:
"""
查询语音克隆任务状态。
Returns:
{
"status": "Queueing" | "Processing" | "Success" | "Fail",
"voice_id": "...", # Success 时有
"trial_url": "...", # Success 时有
"error_msg": "...", # Fail 时有
}
"""
result = await self.provider.query_clone_task(task_id)
status = result.get("status", "Queueing")
ret = {
"status": status,
"task_id": task_id,
}
if status == "Success":
ret["voice_id"] = result.get("voice_id")
ret["trial_url"] = result.get("trial_url")
elif status == "Fail":
ret["error_msg"] = result.get("error_msg", "克隆失败")
return ret
# ==================== 预设音色 ====================
@staticmethod
def get_preset_voices() -> list[dict]:
"""获取预设音色列表"""
return MINIMAX_PRESET_VOICES
@staticmethod
def get_voice_by_id(voice_id: str) -> dict | None:
"""根据 ID 获取音色信息"""
for voice in MINIMAX_PRESET_VOICES:
if voice["voice_id"] == voice_id:
return voice
return None