30536276ba
核心变更:
- 统一第三方接口架构:所有服务走 PlatformGateway(call_sync/submit_task/query_task/handle_webhook)
- 视频生成(Vidu 对口型)纳入 Async Engine,与 script/subtitle/tts 统一为 POST /tasks/{task_type} 模式
- 新增 VideoHandler、TTSHandler,完善 ScriptHandler/SubtitleHandler
- PlatformGateway 生成 internal_task_id,建立 Redis 双向映射,callback 场景传入 Async Engine task_id 保证映射一致
- SlotManager 新增 acquire_ctx 上下文管理器,所有 Handler 统一使用
- ViduAdapter 状态映射归一化(normalize_state/denormalize_state)
- 移除 ViduService Semaphore 和 tenacity 重试,并发控制完全交予 SlotManager
- nonce 防重放下沉到 CallbackCapable 协议
- Service 层错误统一为 PlatformError,路由层错误信息脱敏
- 废弃 /voice/lip-sync,清理 vidu.py 遗留路由
Bug 修复:
- VideoHandler 轮询阶段后添加 continue,防止已提交任务重复创建
- voice.py synthesize_to_file 变量名冲突(request vs request_body)
- PlatformGateway.submit_task 空 data 防护
- ScriptHandler 动态导入 asyncio 改为模块级导入
- SubtitleHandler 完成时补充 progress=100
文档:
- 更新 AGENTS.md 核心功能、运行时架构、异步调度描述
373 lines
12 KiB
Python
373 lines
12 KiB
Python
"""
|
||
火山引擎音视频字幕 API 路由
|
||
============================
|
||
|
||
提供字幕生成、自动打轴等功能。
|
||
"""
|
||
|
||
import logging
|
||
|
||
from fastapi import APIRouter, HTTPException, Request
|
||
|
||
from app.core.exceptions import PlatformError
|
||
from app.schemas.caption import (
|
||
AutoAlignResult,
|
||
AutoAlignSubmitRequest,
|
||
CaptionResult,
|
||
CaptionSubmitRequest,
|
||
CaptionTaskResponse,
|
||
SrtSubtitleResponse,
|
||
)
|
||
from app.schemas.common import ApiResponse, success_response
|
||
from app.services.volcengine_caption_service import (
|
||
VolcengineCaptionService,
|
||
get_caption_service,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter(prefix="/caption", tags=["Caption"])
|
||
|
||
|
||
@router.post("/submit", response_model=ApiResponse[CaptionTaskResponse])
|
||
async def submit_caption_task(request_body: CaptionSubmitRequest, request: Request):
|
||
"""
|
||
提交字幕生成任务
|
||
|
||
提交音频/视频文件URL,生成带时间轴的字幕。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
task_id = await service.submit_caption_task(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
)
|
||
|
||
return success_response(
|
||
data=CaptionTaskResponse(
|
||
task_id=task_id,
|
||
status="pending",
|
||
),
|
||
message="字幕任务已提交",
|
||
)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"提交字幕任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"提交字幕任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕任务提交失败,请稍后重试")
|
||
|
||
|
||
@router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult])
|
||
async def query_caption_task(task_id: str, request: Request, blocking: bool = True):
|
||
"""
|
||
查询字幕任务结果
|
||
|
||
Args:
|
||
task_id: 任务ID
|
||
blocking: 是否阻塞等待结果 (默认True)
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.query_caption_task(task_id, blocking=blocking)
|
||
|
||
return success_response(data=result)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"查询字幕任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"查询字幕任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail="查询字幕任务失败,请稍后重试")
|
||
|
||
|
||
@router.post("/generate", response_model=ApiResponse[CaptionResult])
|
||
async def generate_caption(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
|
||
"""
|
||
生成字幕(完整流程)
|
||
|
||
提交任务并轮询结果,直接返回最终字幕数据。
|
||
适用于不需要异步处理的场景。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.generate_caption(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
return success_response(data=result)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"生成字幕失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"生成字幕异常: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕生成失败,请稍后重试")
|
||
|
||
|
||
@router.post("/generate-ass", response_model=ApiResponse[dict])
|
||
async def generate_ass(
|
||
request_body: CaptionSubmitRequest,
|
||
request: Request,
|
||
video_width: int = 1080,
|
||
video_height: int = 1920,
|
||
max_wait_time: int = 120,
|
||
):
|
||
"""
|
||
生成 ASS 格式字幕(完整流程,使用抖音美好体)
|
||
|
||
Args:
|
||
video_width: 视频宽度(默认 1080)
|
||
video_height: 视频高度(默认 1920)
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.generate_caption(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
ass_content = service.to_ass(
|
||
result.utterances,
|
||
video_width=video_width,
|
||
video_height=video_height,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"ass_content": ass_content,
|
||
"utterances": result.utterances,
|
||
"duration": result.duration,
|
||
"font": "DouyinSansBold",
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"生成ASS字幕失败: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕生成失败,请稍后重试")
|
||
|
||
|
||
@router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse])
|
||
async def generate_srt(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
|
||
"""
|
||
生成 SRT 格式字幕(完整流程)
|
||
|
||
直接返回 SRT 格式字幕文件内容。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.generate_caption(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
srt_content = service.to_srt(result.utterances)
|
||
|
||
return success_response(
|
||
data=SrtSubtitleResponse(
|
||
srt_content=srt_content,
|
||
utterances=result.utterances,
|
||
)
|
||
)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"生成SRT字幕失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"生成SRT字幕异常: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕生成失败,请稍后重试")
|
||
|
||
|
||
@router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse])
|
||
async def submit_auto_align_task(request_body: AutoAlignSubmitRequest, request: Request):
|
||
"""
|
||
提交自动字幕打轴任务
|
||
|
||
为已有字幕文本自动配上时间轴。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
task_id = await service.submit_auto_align_task(
|
||
audio_url=request_body.audio_url,
|
||
audio_text=request_body.audio_text,
|
||
caption_type=request_body.caption_type,
|
||
sta_punc_mode=request_body.sta_punc_mode,
|
||
)
|
||
|
||
return success_response(
|
||
data=CaptionTaskResponse(
|
||
task_id=task_id,
|
||
status="pending",
|
||
),
|
||
message="打轴任务已提交",
|
||
)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"提交打轴任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"提交打轴任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail="打轴任务提交失败,请稍后重试")
|
||
|
||
|
||
@router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult])
|
||
async def query_auto_align_task(task_id: str, request: Request, blocking: bool = True):
|
||
"""
|
||
查询打轴任务结果
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.query_auto_align_task(task_id, blocking=blocking)
|
||
|
||
return success_response(data=result)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"查询打轴任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"查询打轴任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail="查询打轴任务失败,请稍后重试")
|
||
|
||
|
||
@router.post("/ata/align")
|
||
async def auto_align_caption(request_body: AutoAlignSubmitRequest, request: Request, max_wait_time: int = 120):
|
||
"""
|
||
自动字幕打轴(完整流程)
|
||
|
||
提交打轴任务并轮询结果,直接返回最终数据。
|
||
"""
|
||
try:
|
||
logger.info(f"[Caption API] Auto align request: audio_url={request_body.audio_url[:50]}...")
|
||
service = await get_caption_service(request)
|
||
result = await service.auto_align_caption(
|
||
audio_url=request_body.audio_url,
|
||
audio_text=request_body.audio_text,
|
||
caption_type=request_body.caption_type,
|
||
sta_punc_mode=request_body.sta_punc_mode,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
logger.info(
|
||
f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}"
|
||
)
|
||
if result.utterances:
|
||
logger.info(f"[Caption API] First utterance: {result.utterances[0]}")
|
||
|
||
# 手动序列化为字典,确保嵌套模型正确处理
|
||
response_data = {
|
||
"code": 0,
|
||
"message": "Success",
|
||
"duration": result.duration,
|
||
"utterances": [
|
||
{
|
||
"text": u.text,
|
||
"start_time": u.start_time,
|
||
"end_time": u.end_time,
|
||
}
|
||
for u in (result.utterances or [])
|
||
],
|
||
}
|
||
logger.info(f"[Caption API] Response data: {response_data}")
|
||
return success_response(data=response_data)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"自动打轴失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"自动打轴异常: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕打轴失败,请稍后重试")
|
||
|
||
|
||
@router.post("/convert/ass", response_model=ApiResponse[dict])
|
||
async def convert_to_ass(
|
||
result: CaptionResult,
|
||
video_width: int = 1080,
|
||
video_height: int = 1920,
|
||
):
|
||
"""
|
||
将字幕结果转换为 ASS 格式(使用抖音美好体)
|
||
"""
|
||
try:
|
||
ass_content = VolcengineCaptionService.to_ass(
|
||
result.utterances,
|
||
video_width=video_width,
|
||
video_height=video_height,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"ass_content": ass_content,
|
||
"font": "DouyinSansBold",
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换ASS失败: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕格式转换失败,请稍后重试")
|
||
|
||
|
||
@router.post("/convert/srt", response_model=ApiResponse[dict])
|
||
async def convert_to_srt(result: CaptionResult):
|
||
"""
|
||
将字幕结果转换为 SRT 格式
|
||
|
||
用于将 /generate 返回的原始数据转换为 SRT 格式。
|
||
"""
|
||
try:
|
||
srt_content = VolcengineCaptionService.to_srt(result.utterances)
|
||
|
||
return success_response(
|
||
data={
|
||
"srt_content": srt_content,
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换SRT失败: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕格式转换失败,请稍后重试")
|
||
|
||
|
||
@router.post("/convert/vtt", response_model=ApiResponse[dict])
|
||
async def convert_to_vtt(result: CaptionResult):
|
||
"""
|
||
将字幕结果转换为 WebVTT 格式
|
||
"""
|
||
try:
|
||
vtt_content = VolcengineCaptionService.to_vtt(result.utterances)
|
||
|
||
return success_response(
|
||
data={
|
||
"vtt_content": vtt_content,
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换VTT失败: {e}")
|
||
raise HTTPException(status_code=500, detail="字幕格式转换失败,请稍后重试")
|