Files
meijiaka-zy/python-api/app/api/v1/caption.py
T
小鱼开发 30536276ba refactor(scheduler): 统一异步任务调度架构
核心变更:
- 统一第三方接口架构:所有服务走 PlatformGateway(call_sync/submit_task/query_task/handle_webhook)
- 视频生成(Vidu 对口型)纳入 Async Engine,与 script/subtitle/tts 统一为 POST /tasks/{task_type} 模式
- 新增 VideoHandler、TTSHandler,完善 ScriptHandler/SubtitleHandler
- PlatformGateway 生成 internal_task_id,建立 Redis 双向映射,callback 场景传入 Async Engine task_id 保证映射一致
- SlotManager 新增 acquire_ctx 上下文管理器,所有 Handler 统一使用
- ViduAdapter 状态映射归一化(normalize_state/denormalize_state)
- 移除 ViduService Semaphore 和 tenacity 重试,并发控制完全交予 SlotManager
- nonce 防重放下沉到 CallbackCapable 协议
- Service 层错误统一为 PlatformError,路由层错误信息脱敏
- 废弃 /voice/lip-sync,清理 vidu.py 遗留路由

Bug 修复:
- VideoHandler 轮询阶段后添加 continue,防止已提交任务重复创建
- voice.py synthesize_to_file 变量名冲突(request vs request_body)
- PlatformGateway.submit_task 空 data 防护
- ScriptHandler 动态导入 asyncio 改为模块级导入
- SubtitleHandler 完成时补充 progress=100

文档:
- 更新 AGENTS.md 核心功能、运行时架构、异步调度描述
2026-05-05 20:53:18 +08:00

373 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
火山引擎音视频字幕 API 路由
============================
提供字幕生成、自动打轴等功能。
"""
import logging
from fastapi import APIRouter, HTTPException, Request
from app.core.exceptions import PlatformError
from app.schemas.caption import (
AutoAlignResult,
AutoAlignSubmitRequest,
CaptionResult,
CaptionSubmitRequest,
CaptionTaskResponse,
SrtSubtitleResponse,
)
from app.schemas.common import ApiResponse, success_response
from app.services.volcengine_caption_service import (
VolcengineCaptionService,
get_caption_service,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/caption", tags=["Caption"])
@router.post("/submit", response_model=ApiResponse[CaptionTaskResponse])
async def submit_caption_task(request_body: CaptionSubmitRequest, request: Request):
"""
提交字幕生成任务
提交音频/视频文件URL,生成带时间轴的字幕。
"""
try:
service = await get_caption_service(request)
task_id = await service.submit_caption_task(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
)
return success_response(
data=CaptionTaskResponse(
task_id=task_id,
status="pending",
),
message="字幕任务已提交",
)
except PlatformError as e:
logger.error(f"提交字幕任务失败: {e}")
raise
except Exception as e:
logger.error(f"提交字幕任务异常: {e}")
raise HTTPException(status_code=500, detail="字幕任务提交失败,请稍后重试")
@router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult])
async def query_caption_task(task_id: str, request: Request, blocking: bool = True):
"""
查询字幕任务结果
Args:
task_id: 任务ID
blocking: 是否阻塞等待结果 (默认True)
"""
try:
service = await get_caption_service(request)
result = await service.query_caption_task(task_id, blocking=blocking)
return success_response(data=result)
except PlatformError as e:
logger.error(f"查询字幕任务失败: {e}")
raise
except Exception as e:
logger.error(f"查询字幕任务异常: {e}")
raise HTTPException(status_code=500, detail="查询字幕任务失败,请稍后重试")
@router.post("/generate", response_model=ApiResponse[CaptionResult])
async def generate_caption(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
"""
生成字幕(完整流程)
提交任务并轮询结果,直接返回最终字幕数据。
适用于不需要异步处理的场景。
"""
try:
service = await get_caption_service(request)
result = await service.generate_caption(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
max_wait_time=max_wait_time,
)
return success_response(data=result)
except PlatformError as e:
logger.error(f"生成字幕失败: {e}")
raise
except Exception as e:
logger.error(f"生成字幕异常: {e}")
raise HTTPException(status_code=500, detail="字幕生成失败,请稍后重试")
@router.post("/generate-ass", response_model=ApiResponse[dict])
async def generate_ass(
request_body: CaptionSubmitRequest,
request: Request,
video_width: int = 1080,
video_height: int = 1920,
max_wait_time: int = 120,
):
"""
生成 ASS 格式字幕(完整流程,使用抖音美好体)
Args:
video_width: 视频宽度(默认 1080
video_height: 视频高度(默认 1920
"""
try:
service = await get_caption_service(request)
result = await service.generate_caption(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
max_wait_time=max_wait_time,
)
ass_content = service.to_ass(
result.utterances,
video_width=video_width,
video_height=video_height,
)
return success_response(
data={
"ass_content": ass_content,
"utterances": result.utterances,
"duration": result.duration,
"font": "DouyinSansBold",
}
)
except Exception as e:
logger.error(f"生成ASS字幕失败: {e}")
raise HTTPException(status_code=500, detail="字幕生成失败,请稍后重试")
@router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse])
async def generate_srt(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
"""
生成 SRT 格式字幕(完整流程)
直接返回 SRT 格式字幕文件内容。
"""
try:
service = await get_caption_service(request)
result = await service.generate_caption(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
max_wait_time=max_wait_time,
)
srt_content = service.to_srt(result.utterances)
return success_response(
data=SrtSubtitleResponse(
srt_content=srt_content,
utterances=result.utterances,
)
)
except PlatformError as e:
logger.error(f"生成SRT字幕失败: {e}")
raise
except Exception as e:
logger.error(f"生成SRT字幕异常: {e}")
raise HTTPException(status_code=500, detail="字幕生成失败,请稍后重试")
@router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse])
async def submit_auto_align_task(request_body: AutoAlignSubmitRequest, request: Request):
"""
提交自动字幕打轴任务
为已有字幕文本自动配上时间轴。
"""
try:
service = await get_caption_service(request)
task_id = await service.submit_auto_align_task(
audio_url=request_body.audio_url,
audio_text=request_body.audio_text,
caption_type=request_body.caption_type,
sta_punc_mode=request_body.sta_punc_mode,
)
return success_response(
data=CaptionTaskResponse(
task_id=task_id,
status="pending",
),
message="打轴任务已提交",
)
except PlatformError as e:
logger.error(f"提交打轴任务失败: {e}")
raise
except Exception as e:
logger.error(f"提交打轴任务异常: {e}")
raise HTTPException(status_code=500, detail="打轴任务提交失败,请稍后重试")
@router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult])
async def query_auto_align_task(task_id: str, request: Request, blocking: bool = True):
"""
查询打轴任务结果
"""
try:
service = await get_caption_service(request)
result = await service.query_auto_align_task(task_id, blocking=blocking)
return success_response(data=result)
except PlatformError as e:
logger.error(f"查询打轴任务失败: {e}")
raise
except Exception as e:
logger.error(f"查询打轴任务异常: {e}")
raise HTTPException(status_code=500, detail="查询打轴任务失败,请稍后重试")
@router.post("/ata/align")
async def auto_align_caption(request_body: AutoAlignSubmitRequest, request: Request, max_wait_time: int = 120):
"""
自动字幕打轴(完整流程)
提交打轴任务并轮询结果,直接返回最终数据。
"""
try:
logger.info(f"[Caption API] Auto align request: audio_url={request_body.audio_url[:50]}...")
service = await get_caption_service(request)
result = await service.auto_align_caption(
audio_url=request_body.audio_url,
audio_text=request_body.audio_text,
caption_type=request_body.caption_type,
sta_punc_mode=request_body.sta_punc_mode,
max_wait_time=max_wait_time,
)
logger.info(
f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}"
)
if result.utterances:
logger.info(f"[Caption API] First utterance: {result.utterances[0]}")
# 手动序列化为字典,确保嵌套模型正确处理
response_data = {
"code": 0,
"message": "Success",
"duration": result.duration,
"utterances": [
{
"text": u.text,
"start_time": u.start_time,
"end_time": u.end_time,
}
for u in (result.utterances or [])
],
}
logger.info(f"[Caption API] Response data: {response_data}")
return success_response(data=response_data)
except PlatformError as e:
logger.error(f"自动打轴失败: {e}")
raise
except Exception as e:
logger.error(f"自动打轴异常: {e}")
raise HTTPException(status_code=500, detail="字幕打轴失败,请稍后重试")
@router.post("/convert/ass", response_model=ApiResponse[dict])
async def convert_to_ass(
result: CaptionResult,
video_width: int = 1080,
video_height: int = 1920,
):
"""
将字幕结果转换为 ASS 格式(使用抖音美好体)
"""
try:
ass_content = VolcengineCaptionService.to_ass(
result.utterances,
video_width=video_width,
video_height=video_height,
)
return success_response(
data={
"ass_content": ass_content,
"font": "DouyinSansBold",
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换ASS失败: {e}")
raise HTTPException(status_code=500, detail="字幕格式转换失败,请稍后重试")
@router.post("/convert/srt", response_model=ApiResponse[dict])
async def convert_to_srt(result: CaptionResult):
"""
将字幕结果转换为 SRT 格式
用于将 /generate 返回的原始数据转换为 SRT 格式。
"""
try:
srt_content = VolcengineCaptionService.to_srt(result.utterances)
return success_response(
data={
"srt_content": srt_content,
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换SRT失败: {e}")
raise HTTPException(status_code=500, detail="字幕格式转换失败,请稍后重试")
@router.post("/convert/vtt", response_model=ApiResponse[dict])
async def convert_to_vtt(result: CaptionResult):
"""
将字幕结果转换为 WebVTT 格式
"""
try:
vtt_content = VolcengineCaptionService.to_vtt(result.utterances)
return success_response(
data={
"vtt_content": vtt_content,
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换VTT失败: {e}")
raise HTTPException(status_code=500, detail="字幕格式转换失败,请稍后重试")