Files
meijiaka-zy/python-api/app/api/v1/caption.py
T
小鱼开发 e58159fc42 refactor: 第三方平台架构改造(Adapter Protocol + Gateway)
Phase 1: 异常体系统一
- 新增 PlatformError / PlatformErrorType 标准定义
- 改造所有 Provider 异常抛出为 PlatformError
- 注册全局 PlatformError exception handler

Phase 2: Adapter Protocol
- 新增 app/ai/adapters/base.py(PlatformAdapter + SyncCapable + TaskCapable + CallbackCapable)
- 新增 app/ai/adapters/constants.py(Method 常量)
- 新增 PlatformConfigLoader(config/platform-config.yaml)

Phase 3: HTTP Client 统一
- ViduProvider 从 aiohttp 迁移到 httpx(注入方式)
- VolcengineCaptionService 改为注入 http_client
- lifespan 统一管理所有 Client 创建和关闭

Phase 4: Gateway 骨架 + Adapter 实现
- 新增 ViduAdapter / VolcengineArkAdapter / VolcengineCaptionAdapter
- 新增 PlatformGateway(call_sync / submit_task / query_task / handle_webhook)
- 新增 LLMGateway(带 Fallback 降级链)
- lifespan 注册所有 Adapter 和 Gateway

Phase 6: 清理与验证
- 从 Settings 移除 VIDU_BASE_URL / VOLCENGINE_BASE_URL
- Provider 改为从 PlatformConfigLoader 读取 base_url
- 清理 volcengine_caption_service 全局单例
- config_loader 默认路径改为 platform-config.yaml
- Scheduler 注入共享 HTTP client
- vidu.py 回调路由使用 Adapter 验签和解析
- ruff 全量通过,应用启动测试通过
2026-05-04 16:07:16 +08:00

376 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
火山引擎音视频字幕 API 路由
============================
提供字幕生成、自动打轴等功能。
"""
import logging
from fastapi import APIRouter, HTTPException, Request
from app.core.exceptions import PlatformError
from app.schemas.caption import (
AutoAlignResult,
AutoAlignSubmitRequest,
CaptionResult,
CaptionSubmitRequest,
CaptionTaskResponse,
SrtSubtitleResponse,
)
from app.schemas.common import ApiResponse, success_response
from app.services.volcengine_caption_service import (
VolcengineCaptionService,
get_caption_service,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/caption", tags=["Caption"])
@router.post("/submit", response_model=ApiResponse[CaptionTaskResponse])
async def submit_caption_task(request_body: CaptionSubmitRequest, request: Request):
"""
提交字幕生成任务
提交音频/视频文件URL,生成带时间轴的字幕。
"""
try:
service = await get_caption_service(request)
task_id = await service.submit_caption_task(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
)
return success_response(
data=CaptionTaskResponse(
task_id=task_id,
status="pending",
),
message="字幕任务已提交",
)
except PlatformError as e:
logger.error(f"提交字幕任务失败: {e}")
raise
except Exception as e:
logger.error(f"提交字幕任务异常: {e}")
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
@router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult])
async def query_caption_task(task_id: str, request: Request, blocking: bool = True):
"""
查询字幕任务结果
Args:
task_id: 任务ID
blocking: 是否阻塞等待结果 (默认True)
"""
try:
service = await get_caption_service(request)
result = await service.query_caption_task(task_id, blocking=blocking)
return success_response(data=result)
except PlatformError as e:
logger.error(f"查询字幕任务失败: {e}")
raise
except Exception as e:
logger.error(f"查询字幕任务异常: {e}")
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
@router.post("/generate", response_model=ApiResponse[CaptionResult])
async def generate_caption(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
"""
生成字幕(完整流程)
提交任务并轮询结果,直接返回最终字幕数据。
适用于不需要异步处理的场景。
"""
try:
service = await get_caption_service(request)
result = await service.generate_caption(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
max_wait_time=max_wait_time,
)
return success_response(data=result)
except PlatformError as e:
logger.error(f"生成字幕失败: {e}")
raise
except Exception as e:
logger.error(f"生成字幕异常: {e}")
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
@router.post("/generate-ass", response_model=ApiResponse[dict])
async def generate_ass(
request_body: CaptionSubmitRequest,
request: Request,
video_width: int = 1080,
video_height: int = 1920,
max_wait_time: int = 120,
):
"""
生成 ASS 格式字幕(完整流程,使用抖音美好体)
Args:
video_width: 视频宽度(默认 1080
video_height: 视频高度(默认 1920
"""
try:
service = await get_caption_service(request)
result = await service.generate_caption(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
max_wait_time=max_wait_time,
)
ass_content = service.to_ass(
result.utterances,
video_width=video_width,
video_height=video_height,
)
return success_response(
data={
"ass_content": ass_content,
"utterances": result.utterances,
"duration": result.duration,
"font": "DouyinSansBold",
}
)
except Exception as e:
logger.error(f"生成ASS字幕失败: {e}")
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
@router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse])
async def generate_srt(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
"""
生成 SRT 格式字幕(完整流程)
直接返回 SRT 格式字幕文件内容。
"""
try:
service = await get_caption_service(request)
result = await service.generate_caption(
audio_url=request_body.audio_url,
language=request_body.language,
caption_type=request_body.caption_type,
use_punc=request_body.use_punc,
use_itn=request_body.use_itn,
words_per_line=request_body.words_per_line,
max_lines=request_body.max_lines,
max_wait_time=max_wait_time,
)
srt_content = service.to_srt(result.utterances)
return success_response(
data=SrtSubtitleResponse(
srt_content=srt_content,
utterances=result.utterances,
)
)
except PlatformError as e:
logger.error(f"生成SRT字幕失败: {e}")
raise
except Exception as e:
logger.error(f"生成SRT字幕异常: {e}")
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
@router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse])
async def submit_auto_align_task(request_body: AutoAlignSubmitRequest, request: Request):
"""
提交自动字幕打轴任务
为已有字幕文本自动配上时间轴。
"""
try:
service = await get_caption_service(request)
task_id = await service.submit_auto_align_task(
audio_url=request_body.audio_url,
audio_text=request_body.audio_text,
caption_type=request_body.caption_type,
sta_punc_mode=request_body.sta_punc_mode,
)
return success_response(
data=CaptionTaskResponse(
task_id=task_id,
status="pending",
),
message="打轴任务已提交",
)
except PlatformError as e:
logger.error(f"提交打轴任务失败: {e}")
raise
except Exception as e:
logger.error(f"提交打轴任务异常: {e}")
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
@router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult])
async def query_auto_align_task(task_id: str, request: Request, blocking: bool = True):
"""
查询打轴任务结果
"""
try:
service = await get_caption_service(request)
result = await service.query_auto_align_task(task_id, blocking=blocking)
return success_response(data=result)
except PlatformError as e:
logger.error(f"查询打轴任务失败: {e}")
raise
except Exception as e:
logger.error(f"查询打轴任务异常: {e}")
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
@router.post("/ata/align")
async def auto_align_caption(request_body: AutoAlignSubmitRequest, request: Request, max_wait_time: int = 120):
"""
自动字幕打轴(完整流程)
提交打轴任务并轮询结果,直接返回最终数据。
"""
try:
logger.info(f"[Caption API] Auto align request: audio_url={request_body.audio_url[:50]}...")
service = await get_caption_service(request)
result = await service.auto_align_caption(
audio_url=request_body.audio_url,
audio_text=request_body.audio_text,
caption_type=request_body.caption_type,
sta_punc_mode=request_body.sta_punc_mode,
max_wait_time=max_wait_time,
)
logger.info(
f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}"
)
if result.utterances:
logger.info(f"[Caption API] First utterance: {result.utterances[0]}")
# 手动序列化为字典,确保嵌套模型正确处理
response_data = {
"code": 0,
"message": "Success",
"duration": result.duration,
"utterances": [
{
"text": u.text,
"start_time": u.start_time,
"end_time": u.end_time,
}
for u in (result.utterances or [])
],
}
logger.info(f"[Caption API] Response data: {response_data}")
return success_response(data=response_data)
except PlatformError as e:
logger.error(f"自动打轴失败: {e}")
raise
except Exception as e:
logger.error(f"自动打轴异常: {e}")
raise HTTPException(status_code=500, detail=f"打轴失败: {str(e)}")
@router.post("/convert/ass", response_model=ApiResponse[dict])
async def convert_to_ass(
result: CaptionResult,
video_width: int = 1080,
video_height: int = 1920,
):
"""
将字幕结果转换为 ASS 格式(使用抖音美好体)
"""
try:
service = VolcengineCaptionService("", "") # 不需要认证
ass_content = service.to_ass(
result.utterances,
video_width=video_width,
video_height=video_height,
)
return success_response(
data={
"ass_content": ass_content,
"font": "DouyinSansBold",
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换ASS失败: {e}")
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
@router.post("/convert/srt", response_model=ApiResponse[dict])
async def convert_to_srt(result: CaptionResult):
"""
将字幕结果转换为 SRT 格式
用于将 /generate 返回的原始数据转换为 SRT 格式。
"""
try:
service = VolcengineCaptionService("", "") # 不需要认证
srt_content = service.to_srt(result.utterances)
return success_response(
data={
"srt_content": srt_content,
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换SRT失败: {e}")
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
@router.post("/convert/vtt", response_model=ApiResponse[dict])
async def convert_to_vtt(result: CaptionResult):
"""
将字幕结果转换为 WebVTT 格式
"""
try:
service = VolcengineCaptionService("", "") # 不需要认证
vtt_content = service.to_vtt(result.utterances)
return success_response(
data={
"vtt_content": vtt_content,
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换VTT失败: {e}")
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")