e58159fc42
Phase 1: 异常体系统一 - 新增 PlatformError / PlatformErrorType 标准定义 - 改造所有 Provider 异常抛出为 PlatformError - 注册全局 PlatformError exception handler Phase 2: Adapter Protocol - 新增 app/ai/adapters/base.py(PlatformAdapter + SyncCapable + TaskCapable + CallbackCapable) - 新增 app/ai/adapters/constants.py(Method 常量) - 新增 PlatformConfigLoader(config/platform-config.yaml) Phase 3: HTTP Client 统一 - ViduProvider 从 aiohttp 迁移到 httpx(注入方式) - VolcengineCaptionService 改为注入 http_client - lifespan 统一管理所有 Client 创建和关闭 Phase 4: Gateway 骨架 + Adapter 实现 - 新增 ViduAdapter / VolcengineArkAdapter / VolcengineCaptionAdapter - 新增 PlatformGateway(call_sync / submit_task / query_task / handle_webhook) - 新增 LLMGateway(带 Fallback 降级链) - lifespan 注册所有 Adapter 和 Gateway Phase 6: 清理与验证 - 从 Settings 移除 VIDU_BASE_URL / VOLCENGINE_BASE_URL - Provider 改为从 PlatformConfigLoader 读取 base_url - 清理 volcengine_caption_service 全局单例 - config_loader 默认路径改为 platform-config.yaml - Scheduler 注入共享 HTTP client - vidu.py 回调路由使用 Adapter 验签和解析 - ruff 全量通过,应用启动测试通过
376 lines
12 KiB
Python
376 lines
12 KiB
Python
"""
|
||
火山引擎音视频字幕 API 路由
|
||
============================
|
||
|
||
提供字幕生成、自动打轴等功能。
|
||
"""
|
||
|
||
import logging
|
||
|
||
from fastapi import APIRouter, HTTPException, Request
|
||
|
||
from app.core.exceptions import PlatformError
|
||
from app.schemas.caption import (
|
||
AutoAlignResult,
|
||
AutoAlignSubmitRequest,
|
||
CaptionResult,
|
||
CaptionSubmitRequest,
|
||
CaptionTaskResponse,
|
||
SrtSubtitleResponse,
|
||
)
|
||
from app.schemas.common import ApiResponse, success_response
|
||
from app.services.volcengine_caption_service import (
|
||
VolcengineCaptionService,
|
||
get_caption_service,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter(prefix="/caption", tags=["Caption"])
|
||
|
||
|
||
@router.post("/submit", response_model=ApiResponse[CaptionTaskResponse])
|
||
async def submit_caption_task(request_body: CaptionSubmitRequest, request: Request):
|
||
"""
|
||
提交字幕生成任务
|
||
|
||
提交音频/视频文件URL,生成带时间轴的字幕。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
task_id = await service.submit_caption_task(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
)
|
||
|
||
return success_response(
|
||
data=CaptionTaskResponse(
|
||
task_id=task_id,
|
||
status="pending",
|
||
),
|
||
message="字幕任务已提交",
|
||
)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"提交字幕任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"提交字幕任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
|
||
|
||
|
||
@router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult])
|
||
async def query_caption_task(task_id: str, request: Request, blocking: bool = True):
|
||
"""
|
||
查询字幕任务结果
|
||
|
||
Args:
|
||
task_id: 任务ID
|
||
blocking: 是否阻塞等待结果 (默认True)
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.query_caption_task(task_id, blocking=blocking)
|
||
|
||
return success_response(data=result)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"查询字幕任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"查询字幕任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
|
||
|
||
|
||
@router.post("/generate", response_model=ApiResponse[CaptionResult])
|
||
async def generate_caption(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
|
||
"""
|
||
生成字幕(完整流程)
|
||
|
||
提交任务并轮询结果,直接返回最终字幕数据。
|
||
适用于不需要异步处理的场景。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.generate_caption(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
return success_response(data=result)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"生成字幕失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"生成字幕异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/generate-ass", response_model=ApiResponse[dict])
|
||
async def generate_ass(
|
||
request_body: CaptionSubmitRequest,
|
||
request: Request,
|
||
video_width: int = 1080,
|
||
video_height: int = 1920,
|
||
max_wait_time: int = 120,
|
||
):
|
||
"""
|
||
生成 ASS 格式字幕(完整流程,使用抖音美好体)
|
||
|
||
Args:
|
||
video_width: 视频宽度(默认 1080)
|
||
video_height: 视频高度(默认 1920)
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.generate_caption(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
ass_content = service.to_ass(
|
||
result.utterances,
|
||
video_width=video_width,
|
||
video_height=video_height,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"ass_content": ass_content,
|
||
"utterances": result.utterances,
|
||
"duration": result.duration,
|
||
"font": "DouyinSansBold",
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"生成ASS字幕失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse])
|
||
async def generate_srt(request_body: CaptionSubmitRequest, request: Request, max_wait_time: int = 120):
|
||
"""
|
||
生成 SRT 格式字幕(完整流程)
|
||
|
||
直接返回 SRT 格式字幕文件内容。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.generate_caption(
|
||
audio_url=request_body.audio_url,
|
||
language=request_body.language,
|
||
caption_type=request_body.caption_type,
|
||
use_punc=request_body.use_punc,
|
||
use_itn=request_body.use_itn,
|
||
words_per_line=request_body.words_per_line,
|
||
max_lines=request_body.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
srt_content = service.to_srt(result.utterances)
|
||
|
||
return success_response(
|
||
data=SrtSubtitleResponse(
|
||
srt_content=srt_content,
|
||
utterances=result.utterances,
|
||
)
|
||
)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"生成SRT字幕失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"生成SRT字幕异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse])
|
||
async def submit_auto_align_task(request_body: AutoAlignSubmitRequest, request: Request):
|
||
"""
|
||
提交自动字幕打轴任务
|
||
|
||
为已有字幕文本自动配上时间轴。
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
task_id = await service.submit_auto_align_task(
|
||
audio_url=request_body.audio_url,
|
||
audio_text=request_body.audio_text,
|
||
caption_type=request_body.caption_type,
|
||
sta_punc_mode=request_body.sta_punc_mode,
|
||
)
|
||
|
||
return success_response(
|
||
data=CaptionTaskResponse(
|
||
task_id=task_id,
|
||
status="pending",
|
||
),
|
||
message="打轴任务已提交",
|
||
)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"提交打轴任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"提交打轴任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
|
||
|
||
|
||
@router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult])
|
||
async def query_auto_align_task(task_id: str, request: Request, blocking: bool = True):
|
||
"""
|
||
查询打轴任务结果
|
||
"""
|
||
try:
|
||
service = await get_caption_service(request)
|
||
result = await service.query_auto_align_task(task_id, blocking=blocking)
|
||
|
||
return success_response(data=result)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"查询打轴任务失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"查询打轴任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
|
||
|
||
|
||
@router.post("/ata/align")
|
||
async def auto_align_caption(request_body: AutoAlignSubmitRequest, request: Request, max_wait_time: int = 120):
|
||
"""
|
||
自动字幕打轴(完整流程)
|
||
|
||
提交打轴任务并轮询结果,直接返回最终数据。
|
||
"""
|
||
try:
|
||
logger.info(f"[Caption API] Auto align request: audio_url={request_body.audio_url[:50]}...")
|
||
service = await get_caption_service(request)
|
||
result = await service.auto_align_caption(
|
||
audio_url=request_body.audio_url,
|
||
audio_text=request_body.audio_text,
|
||
caption_type=request_body.caption_type,
|
||
sta_punc_mode=request_body.sta_punc_mode,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
logger.info(
|
||
f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}"
|
||
)
|
||
if result.utterances:
|
||
logger.info(f"[Caption API] First utterance: {result.utterances[0]}")
|
||
|
||
# 手动序列化为字典,确保嵌套模型正确处理
|
||
response_data = {
|
||
"code": 0,
|
||
"message": "Success",
|
||
"duration": result.duration,
|
||
"utterances": [
|
||
{
|
||
"text": u.text,
|
||
"start_time": u.start_time,
|
||
"end_time": u.end_time,
|
||
}
|
||
for u in (result.utterances or [])
|
||
],
|
||
}
|
||
logger.info(f"[Caption API] Response data: {response_data}")
|
||
return success_response(data=response_data)
|
||
|
||
except PlatformError as e:
|
||
logger.error(f"自动打轴失败: {e}")
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"自动打轴异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"打轴失败: {str(e)}")
|
||
|
||
|
||
@router.post("/convert/ass", response_model=ApiResponse[dict])
|
||
async def convert_to_ass(
|
||
result: CaptionResult,
|
||
video_width: int = 1080,
|
||
video_height: int = 1920,
|
||
):
|
||
"""
|
||
将字幕结果转换为 ASS 格式(使用抖音美好体)
|
||
"""
|
||
try:
|
||
service = VolcengineCaptionService("", "") # 不需要认证
|
||
ass_content = service.to_ass(
|
||
result.utterances,
|
||
video_width=video_width,
|
||
video_height=video_height,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"ass_content": ass_content,
|
||
"font": "DouyinSansBold",
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换ASS失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|
||
|
||
|
||
@router.post("/convert/srt", response_model=ApiResponse[dict])
|
||
async def convert_to_srt(result: CaptionResult):
|
||
"""
|
||
将字幕结果转换为 SRT 格式
|
||
|
||
用于将 /generate 返回的原始数据转换为 SRT 格式。
|
||
"""
|
||
try:
|
||
service = VolcengineCaptionService("", "") # 不需要认证
|
||
srt_content = service.to_srt(result.utterances)
|
||
|
||
return success_response(
|
||
data={
|
||
"srt_content": srt_content,
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换SRT失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|
||
|
||
|
||
@router.post("/convert/vtt", response_model=ApiResponse[dict])
|
||
async def convert_to_vtt(result: CaptionResult):
|
||
"""
|
||
将字幕结果转换为 WebVTT 格式
|
||
"""
|
||
try:
|
||
service = VolcengineCaptionService("", "") # 不需要认证
|
||
vtt_content = service.to_vtt(result.utterances)
|
||
|
||
return success_response(
|
||
data={
|
||
"vtt_content": vtt_content,
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换VTT失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|