c45cb02385
- QiniuService 新增 async 包装方法(upload_stream_async 等) - upload.py / voice.py 上传路由改为 await async 版本 - voice.py 改用 get_qiniu_service() 单例
462 lines
15 KiB
Python
462 lines
15 KiB
Python
"""
|
||
语音合成与克隆 API 路由
|
||
=======================
|
||
|
||
提供 TTS 语音合成、批量合成、声音克隆等功能。
|
||
基于 Vidu API。
|
||
"""
|
||
|
||
import logging
|
||
import re
|
||
import uuid
|
||
from pathlib import Path
|
||
|
||
import httpx
|
||
from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile
|
||
from pydantic import BaseModel, Field
|
||
|
||
from app.core.exceptions import PlatformError
|
||
from app.schemas.common import ApiResponse, success_response
|
||
from app.services.qiniu_service import QiniuService
|
||
from app.services.vidu_service import (
|
||
DEFAULT_VOICE_ID,
|
||
ViduService,
|
||
get_preset_voices,
|
||
get_vidu_service,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter(prefix="/voice", tags=["Voice"])
|
||
|
||
|
||
# ========== 请求/响应模型 ==========
|
||
|
||
|
||
class TTSSynthesizeRequest(BaseModel):
|
||
"""TTS 合成请求"""
|
||
|
||
text: str = Field(..., min_length=1, max_length=10000, description="待合成文本(≤10000字符)")
|
||
voice_id: str | None = Field(None, description="音色 ID(默认:甜美女性)")
|
||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="语速 0.5-2.0")
|
||
voice_language: str = Field(default="zh", description="音色语种 (zh/en)")
|
||
volume: int = Field(default=0, ge=0, le=10, description="音量 0-10(0=正常)")
|
||
pitch: int = Field(default=0, ge=-12, le=12, description="音调 -12 到 12")
|
||
|
||
|
||
class TTSBatchSegment(BaseModel):
|
||
"""批量合成段落"""
|
||
|
||
text: str = Field(..., min_length=1, description="段落文本")
|
||
index: int = Field(default=0, ge=0, description="段落序号")
|
||
filename: str | None = Field(None, description="输出文件名(不含扩展名)")
|
||
|
||
|
||
class TTSBatchRequest(BaseModel):
|
||
"""批量 TTS 合成请求"""
|
||
|
||
segments: list[TTSBatchSegment] = Field(..., min_length=1, description="段落列表")
|
||
voice_id: str | None = Field(None, description="音色 ID")
|
||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="语速")
|
||
volume: int = Field(default=0, ge=0, le=10, description="音量 0-10")
|
||
pitch: int = Field(default=0, ge=-12, le=12, description="音调 -12 到 12")
|
||
|
||
|
||
class VoiceCloneSubmitRequest(BaseModel):
|
||
"""声音克隆提交请求"""
|
||
|
||
source_audio_url: str | None = Field(None, description="源音频 URL(5-30秒,mp3/wav,需公开可访问)")
|
||
source_video_url: str | None = Field(None, description="源视频 URL(可选)")
|
||
video_id: str | None = Field(None, description="历史作品ID(可选)")
|
||
voice_name: str | None = Field(None, description="自定义音色名称(≤20字符)")
|
||
|
||
|
||
class TTSBatchResponse(BaseModel):
|
||
"""批量合成结果"""
|
||
|
||
total: int
|
||
success_count: int
|
||
failed_count: int
|
||
results: list[dict]
|
||
|
||
|
||
class VoiceCloneTaskResponse(BaseModel):
|
||
"""克隆任务响应"""
|
||
|
||
task_id: str
|
||
status: str
|
||
voice_id: str | None = None
|
||
trial_url: str | None = None
|
||
error_message: str | None = None
|
||
|
||
|
||
class VoiceUploadResponse(BaseModel):
|
||
"""音频上传响应"""
|
||
|
||
url: str = Field(..., description="七牛云访问 URL")
|
||
key: str = Field(..., description="存储 Key")
|
||
|
||
|
||
class VoiceInfo(BaseModel):
|
||
"""音色信息"""
|
||
|
||
voice_id: str
|
||
name: str
|
||
description: str = ""
|
||
language: str = "zh"
|
||
recommended: bool = False
|
||
previewUrl: str | None = None
|
||
|
||
|
||
# ========== API 路由 ==========
|
||
|
||
|
||
@router.post("/upload", response_model=ApiResponse[VoiceUploadResponse])
|
||
async def upload_voice_file(
|
||
file: UploadFile = File(...),
|
||
file_type: str = Form(default="audio", description="文件类型: audio | video"),
|
||
):
|
||
"""
|
||
上传音频/视频文件到七牛云
|
||
|
||
接收音频(mp3/wav)或视频(mp4/mov)文件,上传至七牛云 media bucket,
|
||
返回公开访问 URL。
|
||
"""
|
||
try:
|
||
file_type = file_type.lower().strip()
|
||
if file_type not in ("audio", "video"):
|
||
raise HTTPException(status_code=400, detail="file_type 必须是 audio 或 video")
|
||
|
||
# 根据类型校验 MIME
|
||
if file_type == "audio":
|
||
allowed_types = {"audio/mpeg", "audio/mp3", "audio/wav", "audio/x-wav", "audio/mp4"}
|
||
max_size = 20 * 1024 * 1024 # 20MB
|
||
prefix = "meijiaka-zy/voice"
|
||
type_label = "音频"
|
||
else:
|
||
allowed_types = {"video/mp4", "video/quicktime"}
|
||
max_size = 200 * 1024 * 1024 # 200MB
|
||
prefix = "meijiaka-zy/voice_clone"
|
||
type_label = "视频"
|
||
|
||
content_type = file.content_type or "application/octet-stream"
|
||
if content_type not in allowed_types:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"不支持的{type_label}格式: {content_type},仅支持 {', '.join(allowed_types)}",
|
||
)
|
||
|
||
# 读取文件内容
|
||
content = await file.read()
|
||
if len(content) > max_size:
|
||
raise HTTPException(status_code=400, detail=f"{type_label}文件大小不能超过 {max_size // 1024 // 1024}MB")
|
||
|
||
# 生成存储 key
|
||
ext = content_type.split("/")[-1].replace("quicktime", "mov").replace("mpeg", "mp3")
|
||
key = f"{prefix}/{uuid.uuid4().hex}.{ext}"
|
||
|
||
# 上传到七牛云
|
||
from app.services.qiniu_service import get_qiniu_service
|
||
qiniu = get_qiniu_service()
|
||
from io import BytesIO
|
||
|
||
await qiniu.upload_stream_async(
|
||
stream=BytesIO(content),
|
||
key=key,
|
||
mime_type=content_type,
|
||
)
|
||
|
||
# 获取公开 URL(media bucket 使用 video_domain)
|
||
url = qiniu.get_file_url(qiniu.video_domain, key)
|
||
|
||
return success_response(
|
||
data=VoiceUploadResponse(url=url, key=key),
|
||
message="上传成功",
|
||
)
|
||
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 上传失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"上传失败: {str(e)}")
|
||
|
||
|
||
@router.get("/voices", response_model=ApiResponse[list[VoiceInfo]])
|
||
async def list_voices():
|
||
"""
|
||
获取可用音色列表
|
||
|
||
返回预设的音色选项,用户可选择喜欢的音色进行 TTS 合成。
|
||
"""
|
||
voices = get_preset_voices()
|
||
return success_response(
|
||
data=[VoiceInfo(**v) for v in voices],
|
||
message="获取音色列表成功",
|
||
)
|
||
|
||
|
||
@router.post("/synthesize", response_model=ApiResponse[dict])
|
||
async def synthesize_speech(
|
||
request: TTSSynthesizeRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
):
|
||
"""
|
||
同步 TTS 合成
|
||
|
||
将文本转换为语音,返回音频 URL。
|
||
适用于短文本(≤1000字),长文本建议使用 /synthesize-batch。
|
||
"""
|
||
try:
|
||
audio_url = await service.synthesize(
|
||
text=request.text,
|
||
voice_id=request.voice_id,
|
||
speed=request.speed,
|
||
volume=request.volume,
|
||
pitch=request.pitch,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"audio_url": audio_url,
|
||
"format": "mp3",
|
||
"text": request.text,
|
||
"voice_id": request.voice_id or DEFAULT_VOICE_ID,
|
||
},
|
||
message="合成成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] TTS 合成失败: {e}")
|
||
raise HTTPException(status_code=500, detail="语音合成失败,请稍后重试")
|
||
|
||
|
||
@router.post("/synthesize-batch", response_model=ApiResponse[TTSBatchResponse])
|
||
async def synthesize_batch(
|
||
request: TTSBatchRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
):
|
||
"""
|
||
批量 TTS 合成
|
||
|
||
将多段文本批量转换为语音,保存到临时目录。
|
||
适用于长文本分段合成场景。
|
||
"""
|
||
try:
|
||
segments_data = [s.model_dump() for s in request.segments]
|
||
|
||
results = []
|
||
for seg in segments_data:
|
||
try:
|
||
audio_url = await service.synthesize(
|
||
text=seg["text"],
|
||
voice_id=request.voice_id,
|
||
speed=request.speed,
|
||
volume=request.volume,
|
||
pitch=request.pitch,
|
||
)
|
||
results.append({
|
||
"index": seg.get("index", 0),
|
||
"success": True,
|
||
"audio_url": audio_url,
|
||
"filename": seg.get("filename"),
|
||
})
|
||
except Exception as e:
|
||
# 批量处理:单个 segment 失败记录到结果中,不阻断其他 segment
|
||
error_msg = str(e)
|
||
if isinstance(e, PlatformError):
|
||
error_msg = f"[{e.platform}] {e.error_type}: {e}"
|
||
results.append({
|
||
"index": seg.get("index", 0),
|
||
"success": False,
|
||
"error": error_msg,
|
||
"filename": seg.get("filename"),
|
||
})
|
||
|
||
success_count = sum(1 for r in results if r["success"])
|
||
failed_count = len(results) - success_count
|
||
|
||
return success_response(
|
||
data=TTSBatchResponse(
|
||
total=len(results),
|
||
success_count=success_count,
|
||
failed_count=failed_count,
|
||
results=results,
|
||
),
|
||
message=f"批量合成完成:成功 {success_count} 段,失败 {failed_count} 段",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 批量 TTS 失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"批量合成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/synthesize-file", response_model=ApiResponse[dict])
|
||
async def synthesize_to_file(
|
||
request_body: TTSSynthesizeRequest,
|
||
output_path: str,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
request: Request = None,
|
||
):
|
||
"""
|
||
TTS 合成并保存到指定路径
|
||
|
||
将文本转换为语音并保存到指定文件路径。
|
||
"""
|
||
try:
|
||
audio_url = await service.synthesize(
|
||
text=request_body.text,
|
||
voice_id=request_body.voice_id,
|
||
speed=request_body.speed,
|
||
volume=request_body.volume,
|
||
pitch=request_body.pitch,
|
||
)
|
||
|
||
# 下载音频并保存到指定路径
|
||
client = request.app.state.http_clients["default"] if request else httpx.AsyncClient(timeout=30.0)
|
||
try:
|
||
response = await client.get(audio_url)
|
||
response.raise_for_status()
|
||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||
Path(output_path).write_bytes(response.content)
|
||
finally:
|
||
if not request:
|
||
await client.aclose()
|
||
|
||
return success_response(
|
||
data={
|
||
"file_path": output_path,
|
||
"text": request_body.text,
|
||
"voice_id": request_body.voice_id or DEFAULT_VOICE_ID,
|
||
},
|
||
message="文件保存成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] TTS 文件保存失败: {e}")
|
||
raise HTTPException(status_code=500, detail="文件保存失败,请稍后重试")
|
||
|
||
|
||
def _normalize_voice_id(name: str | None) -> str:
|
||
"""
|
||
将用户输入的名称规范化为 Vidu 合法的 voice_id。
|
||
|
||
Vidu 要求:8~256 字符,首字符必须是字母。
|
||
"""
|
||
if not name:
|
||
return f"vidu_{uuid.uuid4().hex[:8]}"
|
||
|
||
# 只保留字母、数字、下划线
|
||
cleaned = re.sub(r"[^a-zA-Z0-9_]", "", name)
|
||
|
||
# 确保首字符是字母
|
||
if cleaned and not cleaned[0].isalpha():
|
||
cleaned = "v" + cleaned
|
||
elif not cleaned:
|
||
cleaned = "voice"
|
||
|
||
# 长度不足 8,补足随机字符
|
||
if len(cleaned) < 8:
|
||
cleaned = cleaned + uuid.uuid4().hex[: (8 - len(cleaned))]
|
||
|
||
# 长度超过 256,截断
|
||
if len(cleaned) > 256:
|
||
cleaned = cleaned[:256]
|
||
|
||
return cleaned
|
||
|
||
|
||
@router.post("/clone/submit", response_model=ApiResponse[VoiceCloneTaskResponse])
|
||
async def submit_clone_task(
|
||
request: VoiceCloneSubmitRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
):
|
||
"""
|
||
提交声音克隆任务(Vidu)
|
||
|
||
Vidu 声音复刻是同步接口,直接返回结果。
|
||
"""
|
||
try:
|
||
voice_id = _normalize_voice_id(request.voice_name)
|
||
result = await service.clone_voice(
|
||
audio_url=request.source_audio_url or "",
|
||
voice_id=voice_id,
|
||
)
|
||
|
||
# Vidu 同步返回,状态直接为 succeeded
|
||
return success_response(
|
||
data=VoiceCloneTaskResponse(
|
||
task_id=result.get("task_id", ""),
|
||
status="succeeded",
|
||
voice_id=result.get("voice_id"),
|
||
trial_url=result.get("demo_audio"),
|
||
),
|
||
message="克隆成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except ValueError as e:
|
||
logger.error(f"[Voice] 提交克隆任务失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"参数错误: {e}")
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 提交克隆任务失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"任务提交失败: {e}")
|
||
|
||
|
||
@router.get("/clone/query/{task_id}", response_model=ApiResponse[VoiceCloneTaskResponse])
|
||
async def query_clone_task(task_id: str, blocking: bool = False):
|
||
"""
|
||
查询声音克隆任务状态(Vidu)
|
||
|
||
Vidu 声音复刻是同步接口,此端点仅做兼容,直接返回成功状态。
|
||
"""
|
||
return success_response(
|
||
data=VoiceCloneTaskResponse(
|
||
task_id=task_id,
|
||
status="succeeded",
|
||
),
|
||
message="克隆已完成",
|
||
)
|
||
|
||
|
||
@router.post("/clone/clone-and-wait", response_model=ApiResponse[VoiceCloneTaskResponse])
|
||
async def clone_and_wait(
|
||
request: VoiceCloneSubmitRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
poll_interval: float = 5.0,
|
||
):
|
||
"""
|
||
一站式克隆(提交并等待完成)
|
||
|
||
提交克隆任务并阻塞等待结果,直接返回最终状态。
|
||
适用于需要等待克隆完成的场景。
|
||
"""
|
||
try:
|
||
voice_id = _normalize_voice_id(request.voice_name)
|
||
result = await service.clone_voice(
|
||
audio_url=request.source_audio_url or "",
|
||
voice_id=voice_id,
|
||
)
|
||
|
||
return success_response(
|
||
data=VoiceCloneTaskResponse(
|
||
task_id=result.get("task_id", ""),
|
||
status="succeeded",
|
||
voice_id=result.get("voice_id"),
|
||
trial_url=result.get("demo_audio"),
|
||
),
|
||
message="克隆成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 克隆失败: {e}")
|
||
raise HTTPException(status_code=500, detail="声音克隆失败,请稍后重试")
|
||
|