""" 火山引擎音视频字幕 API 路由 ============================ 提供字幕生成、自动打轴等功能。 """ import logging from fastapi import APIRouter, HTTPException from app.schemas.caption import ( AutoAlignResult, AutoAlignSubmitRequest, CaptionResult, CaptionSubmitRequest, CaptionTaskResponse, SrtSubtitleResponse, ) from app.schemas.common import ApiResponse, success_response from app.services.volcengine_caption_service import ( VolcengineCaptionError, VolcengineCaptionService, get_caption_service, ) logger = logging.getLogger(__name__) router = APIRouter(prefix="/caption", tags=["Caption"]) @router.post("/submit", response_model=ApiResponse[CaptionTaskResponse]) async def submit_caption_task(request: CaptionSubmitRequest): """ 提交字幕生成任务 提交音频/视频文件URL,生成带时间轴的字幕。 """ try: service = await get_caption_service() task_id = await service.submit_caption_task( audio_url=request.audio_url, language=request.language, caption_type=request.caption_type, use_punc=request.use_punc, use_itn=request.use_itn, words_per_line=request.words_per_line, max_lines=request.max_lines, ) return success_response( data=CaptionTaskResponse( task_id=task_id, status="pending", ), message="字幕任务已提交", ) except VolcengineCaptionError as e: logger.error(f"提交字幕任务失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"提交字幕任务异常: {e}") raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}") @router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult]) async def query_caption_task(task_id: str, blocking: bool = True): """ 查询字幕任务结果 Args: task_id: 任务ID blocking: 是否阻塞等待结果 (默认True) """ try: service = await get_caption_service() result = await service.query_caption_task(task_id, blocking=blocking) return success_response(data=result) except VolcengineCaptionError as e: logger.error(f"查询字幕任务失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"查询字幕任务异常: {e}") raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}") @router.post("/generate", response_model=ApiResponse[CaptionResult]) async def generate_caption(request: CaptionSubmitRequest, max_wait_time: int = 120): """ 生成字幕(完整流程) 提交任务并轮询结果,直接返回最终字幕数据。 适用于不需要异步处理的场景。 """ try: service = await get_caption_service() result = await service.generate_caption( audio_url=request.audio_url, language=request.language, caption_type=request.caption_type, use_punc=request.use_punc, use_itn=request.use_itn, words_per_line=request.words_per_line, max_lines=request.max_lines, max_wait_time=max_wait_time, ) return success_response(data=result) except VolcengineCaptionError as e: logger.error(f"生成字幕失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"生成字幕异常: {e}") raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}") @router.post("/generate-ass", response_model=ApiResponse[dict]) async def generate_ass( request: CaptionSubmitRequest, video_width: int = 1080, video_height: int = 1920, max_wait_time: int = 120, ): """ 生成 ASS 格式字幕(完整流程,使用抖音美好体) Args: video_width: 视频宽度(默认 1080) video_height: 视频高度(默认 1920) """ try: service = await get_caption_service() result = await service.generate_caption( audio_url=request.audio_url, language=request.language, caption_type=request.caption_type, use_punc=request.use_punc, use_itn=request.use_itn, words_per_line=request.words_per_line, max_lines=request.max_lines, max_wait_time=max_wait_time, ) ass_content = service.to_ass( result.utterances, video_width=video_width, video_height=video_height, ) return success_response( data={ "ass_content": ass_content, "utterances": result.utterances, "duration": result.duration, "font": "DouyinSansBold", } ) except Exception as e: logger.error(f"生成ASS字幕失败: {e}") raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}") @router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse]) async def generate_srt(request: CaptionSubmitRequest, max_wait_time: int = 120): """ 生成 SRT 格式字幕(完整流程) 直接返回 SRT 格式字幕文件内容。 """ try: service = await get_caption_service() result = await service.generate_caption( audio_url=request.audio_url, language=request.language, caption_type=request.caption_type, use_punc=request.use_punc, use_itn=request.use_itn, words_per_line=request.words_per_line, max_lines=request.max_lines, max_wait_time=max_wait_time, ) srt_content = service.to_srt(result.utterances) return success_response( data=SrtSubtitleResponse( srt_content=srt_content, utterances=result.utterances, ) ) except VolcengineCaptionError as e: logger.error(f"生成SRT字幕失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"生成SRT字幕异常: {e}") raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}") @router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse]) async def submit_auto_align_task(request: AutoAlignSubmitRequest): """ 提交自动字幕打轴任务 为已有字幕文本自动配上时间轴。 """ try: service = await get_caption_service() task_id = await service.submit_auto_align_task( audio_url=request.audio_url, audio_text=request.audio_text, caption_type=request.caption_type, sta_punc_mode=request.sta_punc_mode, ) return success_response( data=CaptionTaskResponse( task_id=task_id, status="pending", ), message="打轴任务已提交", ) except VolcengineCaptionError as e: logger.error(f"提交打轴任务失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"提交打轴任务异常: {e}") raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}") @router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult]) async def query_auto_align_task(task_id: str, blocking: bool = True): """ 查询打轴任务结果 """ try: service = await get_caption_service() result = await service.query_auto_align_task(task_id, blocking=blocking) return success_response(data=result) except VolcengineCaptionError as e: logger.error(f"查询打轴任务失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"查询打轴任务异常: {e}") raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}") @router.post("/ata/align") async def auto_align_caption(request: AutoAlignSubmitRequest, max_wait_time: int = 120): """ 自动字幕打轴(完整流程) 提交打轴任务并轮询结果,直接返回最终数据。 """ try: logger.info(f"[Caption API] Auto align request: audio_url={request.audio_url[:50]}...") service = await get_caption_service() result = await service.auto_align_caption( audio_url=request.audio_url, audio_text=request.audio_text, caption_type=request.caption_type, sta_punc_mode=request.sta_punc_mode, max_wait_time=max_wait_time, ) logger.info( f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}" ) if result.utterances: logger.info(f"[Caption API] First utterance: {result.utterances[0]}") # 手动序列化为字典,确保嵌套模型正确处理 response_data = { "code": 0, "message": "Success", "duration": result.duration, "utterances": [ { "text": u.text, "start_time": u.start_time, "end_time": u.end_time, } for u in (result.utterances or []) ], } logger.info(f"[Caption API] Response data: {response_data}") return success_response(data=response_data) except VolcengineCaptionError as e: logger.error(f"自动打轴失败: {e}") raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"自动打轴异常: {e}") raise HTTPException(status_code=500, detail=f"打轴失败: {str(e)}") @router.post("/convert/ass", response_model=ApiResponse[dict]) async def convert_to_ass( result: CaptionResult, video_width: int = 1080, video_height: int = 1920, ): """ 将字幕结果转换为 ASS 格式(使用抖音美好体) """ try: service = VolcengineCaptionService("", "") # 不需要认证 ass_content = service.to_ass( result.utterances, video_width=video_width, video_height=video_height, ) return success_response( data={ "ass_content": ass_content, "font": "DouyinSansBold", "utterances_count": len(result.utterances), } ) except Exception as e: logger.error(f"转换ASS失败: {e}") raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}") @router.post("/convert/srt", response_model=ApiResponse[dict]) async def convert_to_srt(result: CaptionResult): """ 将字幕结果转换为 SRT 格式 用于将 /generate 返回的原始数据转换为 SRT 格式。 """ try: service = VolcengineCaptionService("", "") # 不需要认证 srt_content = service.to_srt(result.utterances) return success_response( data={ "srt_content": srt_content, "utterances_count": len(result.utterances), } ) except Exception as e: logger.error(f"转换SRT失败: {e}") raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}") @router.post("/convert/vtt", response_model=ApiResponse[dict]) async def convert_to_vtt(result: CaptionResult): """ 将字幕结果转换为 WebVTT 格式 """ try: service = VolcengineCaptionService("", "") # 不需要认证 vtt_content = service.to_vtt(result.utterances) return success_response( data={ "vtt_content": vtt_content, "utterances_count": len(result.utterances), } ) except Exception as e: logger.error(f"转换VTT失败: {e}") raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")