95e55293c6
后端安全: - DEBUG 默认 True → False - 彻底移除 AUTH_BYPASS 认证绕过 - 验证码不再明文打印到日志 - 上传接口增加大小限制(500MB/20MB/100MB)与魔数校验 - python-jose → PyJWT, 更新 requirements.lock/uv.lock - Bandit 恢复关键规则(B104/B301/B305/B314/B324/B603/B607) - 修复 5 处 try_except_pass, 15 处加 nosec 注释 - 启用 Bandit pre-commit 钩子 前端安全: - 配置完整 CSP 策略 - 收紧 Capabilities(fs:allow-read-file → $RESOURCE/**) - 移除硬编码 devToken - 清理前端 TODO(美家卡智影命名统一) 部署修复: - docker-compose.prod 增加 alembic 迁移步骤 - api + scheduler 增加 Redis 心跳健康检查 - Nginx 添加安全响应头 - Nginx client_max_body_size 100M → 500M - .env.example 补充 UPLOAD_MAX_* 配置与安全注释 其他: - /voice/upload 合并到 /upload/audio - Rust 上传增加文件大小检查 - 清理 Rust 19 处 println! + 前端 21 处 console.info - 修复 VideoCompose.tsx toast 未导入(已有bug)
466 lines
15 KiB
Python
466 lines
15 KiB
Python
"""
|
||
语音合成与克隆 API 路由
|
||
=======================
|
||
|
||
提供 TTS 语音合成、批量合成、声音复刻等功能。
|
||
基于 Vidu API。
|
||
"""
|
||
|
||
import asyncio
|
||
import logging
|
||
import re
|
||
import uuid
|
||
from pathlib import Path
|
||
|
||
import httpx
|
||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||
from pydantic import BaseModel, Field
|
||
from sqlalchemy.ext.asyncio import AsyncSession
|
||
|
||
from app.api.deps import get_current_user
|
||
from app.core.exceptions import PlatformError
|
||
from app.db.session import get_db
|
||
from app.models.user import User
|
||
from app.schemas.common import ApiResponse, success_response
|
||
from app.services import point_service as ps
|
||
from app.services.vidu_service import (
|
||
DEFAULT_VOICE_ID,
|
||
ViduService,
|
||
get_preset_voices,
|
||
get_vidu_service,
|
||
)
|
||
from app.utils.audio_utils import get_audio_duration
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter(prefix="/voice", tags=["Voice"])
|
||
|
||
|
||
# ========== 请求/响应模型 ==========
|
||
|
||
|
||
class TTSSynthesizeRequest(BaseModel):
|
||
"""TTS 合成请求"""
|
||
|
||
text: str = Field(..., min_length=1, max_length=10000, description="待合成文本(≤10000字符)")
|
||
voice_id: str | None = Field(None, description="音色 ID(默认:甜美女性)")
|
||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="语速 0.5-2.0")
|
||
voice_language: str = Field(default="zh", description="音色语种 (zh/en)")
|
||
volume: int = Field(default=0, ge=0, le=10, description="音量 0-10(0=正常)")
|
||
pitch: int = Field(default=0, ge=-12, le=12, description="音调 -12 到 12")
|
||
|
||
|
||
class TTSBatchSegment(BaseModel):
|
||
"""批量合成段落"""
|
||
|
||
text: str = Field(..., min_length=1, description="段落文本")
|
||
index: int = Field(default=0, ge=0, description="段落序号")
|
||
filename: str | None = Field(None, description="输出文件名(不含扩展名)")
|
||
|
||
|
||
class TTSBatchRequest(BaseModel):
|
||
"""批量 TTS 合成请求"""
|
||
|
||
segments: list[TTSBatchSegment] = Field(..., min_length=1, description="段落列表")
|
||
voice_id: str | None = Field(None, description="音色 ID")
|
||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="语速")
|
||
volume: int = Field(default=0, ge=0, le=10, description="音量 0-10")
|
||
pitch: int = Field(default=0, ge=-12, le=12, description="音调 -12 到 12")
|
||
|
||
|
||
class VoiceCloneSubmitRequest(BaseModel):
|
||
"""声音复刻提交请求"""
|
||
|
||
source_audio_url: str | None = Field(None, description="源音频 URL(5-30秒,mp3/wav,需公开可访问)")
|
||
source_video_url: str | None = Field(None, description="源视频 URL(可选)")
|
||
video_id: str | None = Field(None, description="历史作品ID(可选)")
|
||
voice_name: str | None = Field(None, description="自定义音色名称(≤20字符)")
|
||
|
||
|
||
class TTSBatchResponse(BaseModel):
|
||
"""批量合成结果"""
|
||
|
||
total: int
|
||
success_count: int
|
||
failed_count: int
|
||
results: list[dict]
|
||
|
||
|
||
class VoiceCloneTaskResponse(BaseModel):
|
||
"""克隆任务响应"""
|
||
|
||
task_id: str
|
||
status: str
|
||
voice_id: str | None = None
|
||
trial_url: str | None = None
|
||
error_message: str | None = None
|
||
|
||
|
||
class VoiceInfo(BaseModel):
|
||
"""音色信息"""
|
||
|
||
voice_id: str
|
||
name: str
|
||
description: str = ""
|
||
language: str = "zh"
|
||
recommended: bool = False
|
||
previewUrl: str | None = None
|
||
|
||
|
||
# ========== API 路由 ==========
|
||
|
||
|
||
@router.get("/voices", response_model=ApiResponse[list[VoiceInfo]])
|
||
async def list_voices(
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
获取可用音色列表
|
||
|
||
返回预设的音色选项,用户可选择喜欢的音色进行 TTS 合成。
|
||
"""
|
||
voices = get_preset_voices()
|
||
return success_response(
|
||
data=[VoiceInfo(**v) for v in voices],
|
||
message="获取音色列表成功",
|
||
)
|
||
|
||
|
||
@router.post("/synthesize", response_model=ApiResponse[dict])
|
||
async def synthesize_speech(
|
||
request: TTSSynthesizeRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
db: AsyncSession = Depends(get_db),
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
同步 TTS 合成
|
||
|
||
将文本转换为语音,返回音频 URL。
|
||
适用于短文本(≤1000字),长文本建议使用 /synthesize-batch。
|
||
"""
|
||
try:
|
||
audio_url = await service.synthesize(
|
||
text=request.text,
|
||
voice_id=request.voice_id,
|
||
speed=request.speed,
|
||
volume=request.volume,
|
||
pitch=request.pitch,
|
||
)
|
||
|
||
# 探测音频时长并扣费
|
||
try:
|
||
seconds = await get_audio_duration(audio_url)
|
||
points = ps._calculate_cost("tts", {"seconds": seconds})
|
||
await ps.consume(
|
||
db,
|
||
user_id=current_user.id,
|
||
points=points,
|
||
source_type="tts",
|
||
source_id=f"tts_{current_user.id}_{asyncio.get_event_loop().time()}",
|
||
description="【配音合成】",
|
||
duration=seconds,
|
||
)
|
||
await db.commit()
|
||
except Exception as e:
|
||
logger.error(f"[Voice] TTS 扣费失败: {e}")
|
||
# 扣费失败不影响合成结果
|
||
|
||
return success_response(
|
||
data={
|
||
"audio_url": audio_url,
|
||
"format": "mp3",
|
||
"text": request.text,
|
||
"voice_id": request.voice_id or DEFAULT_VOICE_ID,
|
||
},
|
||
message="合成成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] TTS 合成失败: {e}")
|
||
raise HTTPException(status_code=500, detail="语音合成失败,请稍后重试")
|
||
|
||
|
||
@router.post("/synthesize-batch", response_model=ApiResponse[TTSBatchResponse])
|
||
async def synthesize_batch(
|
||
request: TTSBatchRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
db: AsyncSession = Depends(get_db),
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
批量 TTS 合成
|
||
|
||
将多段文本批量转换为语音,保存到临时目录。
|
||
适用于长文本分段合成场景。
|
||
"""
|
||
try:
|
||
segments_data = [s.model_dump() for s in request.segments]
|
||
|
||
results = []
|
||
for seg in segments_data:
|
||
try:
|
||
audio_url = await service.synthesize(
|
||
text=seg["text"],
|
||
voice_id=request.voice_id,
|
||
speed=request.speed,
|
||
volume=request.volume,
|
||
pitch=request.pitch,
|
||
)
|
||
results.append({
|
||
"index": seg.get("index", 0),
|
||
"success": True,
|
||
"audio_url": audio_url,
|
||
"filename": seg.get("filename"),
|
||
})
|
||
except Exception as e:
|
||
# 批量处理:单个 segment 失败记录到结果中,不阻断其他 segment
|
||
error_msg = str(e)
|
||
if isinstance(e, PlatformError):
|
||
error_msg = f"[{e.platform}] {e.error_type}: {e}"
|
||
results.append({
|
||
"index": seg.get("index", 0),
|
||
"success": False,
|
||
"error": error_msg,
|
||
"filename": seg.get("filename"),
|
||
})
|
||
|
||
# 批量探测时长并汇总扣费
|
||
total_seconds = 0.0
|
||
for r in results:
|
||
if r["success"] and r.get("audio_url"):
|
||
try:
|
||
total_seconds += await get_audio_duration(r["audio_url"])
|
||
except Exception as e:
|
||
logger.warning(f"[Voice] 批量探测时长失败: {e}")
|
||
|
||
if total_seconds > 0:
|
||
try:
|
||
points = ps._calculate_cost("tts", {"seconds": total_seconds})
|
||
await ps.consume(
|
||
db,
|
||
user_id=current_user.id,
|
||
points=points,
|
||
source_type="tts",
|
||
source_id=f"tts_batch_{current_user.id}_{asyncio.get_event_loop().time()}",
|
||
description="【配音合成】",
|
||
duration=total_seconds,
|
||
)
|
||
await db.commit()
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 批量 TTS 扣费失败: {e}")
|
||
|
||
success_count = sum(1 for r in results if r["success"])
|
||
failed_count = len(results) - success_count
|
||
|
||
return success_response(
|
||
data=TTSBatchResponse(
|
||
total=len(results),
|
||
success_count=success_count,
|
||
failed_count=failed_count,
|
||
results=results,
|
||
),
|
||
message=f"批量合成完成:成功 {success_count} 段,失败 {failed_count} 段",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 批量 TTS 失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"批量合成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/synthesize-file", response_model=ApiResponse[dict])
|
||
async def synthesize_to_file(
|
||
request_body: TTSSynthesizeRequest,
|
||
output_path: str,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
request: Request = None,
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
TTS 合成并保存到指定路径
|
||
|
||
将文本转换为语音并保存到指定文件路径。
|
||
注意:不对 output_path 做目录白名单限制。
|
||
本接口仅用于 Tauri 桌面端本地文件写入,调用方就是用户自己的设备,
|
||
不存在第三方攻击场景,故不做路径限制。
|
||
"""
|
||
try:
|
||
audio_url = await service.synthesize(
|
||
text=request_body.text,
|
||
voice_id=request_body.voice_id,
|
||
speed=request_body.speed,
|
||
volume=request_body.volume,
|
||
pitch=request_body.pitch,
|
||
)
|
||
|
||
# 下载音频并保存到指定路径
|
||
client = request.app.state.http_clients["default"] if request else httpx.AsyncClient(timeout=30.0)
|
||
try:
|
||
response = await client.get(audio_url)
|
||
response.raise_for_status()
|
||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||
Path(output_path).write_bytes(response.content)
|
||
finally:
|
||
if not request:
|
||
await client.aclose()
|
||
|
||
return success_response(
|
||
data={
|
||
"file_path": output_path,
|
||
"text": request_body.text,
|
||
"voice_id": request_body.voice_id or DEFAULT_VOICE_ID,
|
||
},
|
||
message="文件保存成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] TTS 文件保存失败: {e}")
|
||
raise HTTPException(status_code=500, detail="文件保存失败,请稍后重试")
|
||
|
||
|
||
def _normalize_voice_id(name: str | None) -> str:
|
||
"""
|
||
将用户输入的名称规范化为 Vidu 合法的 voice_id。
|
||
|
||
Vidu 要求:8~256 字符,首字符必须是字母。
|
||
"""
|
||
if not name:
|
||
return f"vidu_{uuid.uuid4().hex[:8]}"
|
||
|
||
# 只保留字母、数字、下划线
|
||
cleaned = re.sub(r"[^a-zA-Z0-9_]", "", name)
|
||
|
||
# 确保首字符是字母
|
||
if cleaned and not cleaned[0].isalpha():
|
||
cleaned = "v" + cleaned
|
||
elif not cleaned:
|
||
cleaned = "voice"
|
||
|
||
# 长度不足 8,补足随机字符
|
||
if len(cleaned) < 8:
|
||
cleaned = cleaned + uuid.uuid4().hex[: (8 - len(cleaned))]
|
||
|
||
# 长度超过 256,截断
|
||
if len(cleaned) > 256:
|
||
cleaned = cleaned[:256]
|
||
|
||
return cleaned
|
||
|
||
|
||
@router.post("/clone/submit", response_model=ApiResponse[VoiceCloneTaskResponse])
|
||
async def submit_clone_task(
|
||
request: VoiceCloneSubmitRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
db: AsyncSession = Depends(get_db),
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
提交声音复刻任务(Vidu)
|
||
|
||
Vidu 声音复刻是同步接口,直接返回结果。
|
||
"""
|
||
try:
|
||
voice_id = _normalize_voice_id(request.voice_name)
|
||
result = await service.clone_voice(
|
||
audio_url=request.source_audio_url or "",
|
||
voice_id=voice_id,
|
||
)
|
||
|
||
# 扣费
|
||
try:
|
||
points = ps._calculate_cost("voice_clone")
|
||
await ps.consume(
|
||
db,
|
||
user_id=current_user.id,
|
||
points=points,
|
||
source_type="voice_clone",
|
||
source_id=result.get("voice_id", "unknown"),
|
||
description="【声音复刻】",
|
||
)
|
||
await db.commit()
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 克隆扣费失败: {e}")
|
||
|
||
# Vidu 同步返回,状态直接为 succeeded
|
||
return success_response(
|
||
data=VoiceCloneTaskResponse(
|
||
task_id=result.get("task_id", ""),
|
||
status="succeeded",
|
||
voice_id=result.get("voice_id"),
|
||
trial_url=result.get("demo_audio"),
|
||
),
|
||
message="克隆成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except ValueError as e:
|
||
logger.error(f"[Voice] 提交克隆任务失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"参数错误: {e}")
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 提交克隆任务失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"任务提交失败: {e}")
|
||
|
||
|
||
@router.get("/clone/query/{task_id}", response_model=ApiResponse[VoiceCloneTaskResponse])
|
||
async def query_clone_task(
|
||
task_id: str,
|
||
blocking: bool = False,
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
查询声音复刻任务状态(Vidu)
|
||
|
||
Vidu 声音复刻是同步接口,此端点仅做兼容,直接返回成功状态。
|
||
"""
|
||
return success_response(
|
||
data=VoiceCloneTaskResponse(
|
||
task_id=task_id,
|
||
status="succeeded",
|
||
),
|
||
message="克隆已完成",
|
||
)
|
||
|
||
|
||
@router.post("/clone/clone-and-wait", response_model=ApiResponse[VoiceCloneTaskResponse])
|
||
async def clone_and_wait(
|
||
request: VoiceCloneSubmitRequest,
|
||
service: ViduService = Depends(get_vidu_service),
|
||
poll_interval: float = 5.0,
|
||
current_user: User = Depends(get_current_user),
|
||
):
|
||
"""
|
||
一站式克隆(提交并等待完成)
|
||
|
||
提交克隆任务并阻塞等待结果,直接返回最终状态。
|
||
适用于需要等待克隆完成的场景。
|
||
"""
|
||
try:
|
||
voice_id = _normalize_voice_id(request.voice_name)
|
||
result = await service.clone_voice(
|
||
audio_url=request.source_audio_url or "",
|
||
voice_id=voice_id,
|
||
)
|
||
|
||
return success_response(
|
||
data=VoiceCloneTaskResponse(
|
||
task_id=result.get("task_id", ""),
|
||
status="succeeded",
|
||
voice_id=result.get("voice_id"),
|
||
trial_url=result.get("demo_audio"),
|
||
),
|
||
message="克隆成功",
|
||
)
|
||
|
||
except PlatformError:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"[Voice] 克隆失败: {e}")
|
||
raise HTTPException(status_code=500, detail="声音复刻失败,请稍后重试")
|
||
|