Files
meijiaka-zy/python-api/app/api/v1/caption.py
T

375 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
火山引擎音视频字幕 API 路由
============================
提供字幕生成、自动打轴等功能。
"""
import logging
from fastapi import APIRouter, HTTPException
from app.schemas.caption import (
AutoAlignResult,
AutoAlignSubmitRequest,
CaptionResult,
CaptionSubmitRequest,
CaptionTaskResponse,
SrtSubtitleResponse,
)
from app.schemas.common import ApiResponse, success_response
from app.services.volcengine_caption_service import (
VolcengineCaptionError,
VolcengineCaptionService,
get_caption_service,
)
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/caption", tags=["Caption"])
@router.post("/submit", response_model=ApiResponse[CaptionTaskResponse])
async def submit_caption_task(request: CaptionSubmitRequest):
"""
提交字幕生成任务
提交音频/视频文件URL,生成带时间轴的字幕。
"""
try:
service = await get_caption_service()
task_id = await service.submit_caption_task(
audio_url=request.audio_url,
language=request.language,
caption_type=request.caption_type,
use_punc=request.use_punc,
use_itn=request.use_itn,
words_per_line=request.words_per_line,
max_lines=request.max_lines,
)
return success_response(
data=CaptionTaskResponse(
task_id=task_id,
status="pending",
),
message="字幕任务已提交",
)
except VolcengineCaptionError as e:
logger.error(f"提交字幕任务失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"提交字幕任务异常: {e}")
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
@router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult])
async def query_caption_task(task_id: str, blocking: bool = True):
"""
查询字幕任务结果
Args:
task_id: 任务ID
blocking: 是否阻塞等待结果 (默认True)
"""
try:
service = await get_caption_service()
result = await service.query_caption_task(task_id, blocking=blocking)
return success_response(data=result)
except VolcengineCaptionError as e:
logger.error(f"查询字幕任务失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"查询字幕任务异常: {e}")
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
@router.post("/generate", response_model=ApiResponse[CaptionResult])
async def generate_caption(request: CaptionSubmitRequest, max_wait_time: int = 120):
"""
生成字幕(完整流程)
提交任务并轮询结果,直接返回最终字幕数据。
适用于不需要异步处理的场景。
"""
try:
service = await get_caption_service()
result = await service.generate_caption(
audio_url=request.audio_url,
language=request.language,
caption_type=request.caption_type,
use_punc=request.use_punc,
use_itn=request.use_itn,
words_per_line=request.words_per_line,
max_lines=request.max_lines,
max_wait_time=max_wait_time,
)
return success_response(data=result)
except VolcengineCaptionError as e:
logger.error(f"生成字幕失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"生成字幕异常: {e}")
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
@router.post("/generate-ass", response_model=ApiResponse[dict])
async def generate_ass(
request: CaptionSubmitRequest,
video_width: int = 1080,
video_height: int = 1920,
max_wait_time: int = 120,
):
"""
生成 ASS 格式字幕(完整流程,使用抖音美好体)
Args:
video_width: 视频宽度(默认 1080
video_height: 视频高度(默认 1920
"""
try:
service = await get_caption_service()
result = await service.generate_caption(
audio_url=request.audio_url,
language=request.language,
caption_type=request.caption_type,
use_punc=request.use_punc,
use_itn=request.use_itn,
words_per_line=request.words_per_line,
max_lines=request.max_lines,
max_wait_time=max_wait_time,
)
ass_content = service.to_ass(
result.utterances,
video_width=video_width,
video_height=video_height,
)
return success_response(
data={
"ass_content": ass_content,
"utterances": result.utterances,
"duration": result.duration,
"font": "DouyinSansBold",
}
)
except Exception as e:
logger.error(f"生成ASS字幕失败: {e}")
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
@router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse])
async def generate_srt(request: CaptionSubmitRequest, max_wait_time: int = 120):
"""
生成 SRT 格式字幕(完整流程)
直接返回 SRT 格式字幕文件内容。
"""
try:
service = await get_caption_service()
result = await service.generate_caption(
audio_url=request.audio_url,
language=request.language,
caption_type=request.caption_type,
use_punc=request.use_punc,
use_itn=request.use_itn,
words_per_line=request.words_per_line,
max_lines=request.max_lines,
max_wait_time=max_wait_time,
)
srt_content = service.to_srt(result.utterances)
return success_response(
data=SrtSubtitleResponse(
srt_content=srt_content,
utterances=result.utterances,
)
)
except VolcengineCaptionError as e:
logger.error(f"生成SRT字幕失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"生成SRT字幕异常: {e}")
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
@router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse])
async def submit_auto_align_task(request: AutoAlignSubmitRequest):
"""
提交自动字幕打轴任务
为已有字幕文本自动配上时间轴。
"""
try:
service = await get_caption_service()
task_id = await service.submit_auto_align_task(
audio_url=request.audio_url,
audio_text=request.audio_text,
caption_type=request.caption_type,
sta_punc_mode=request.sta_punc_mode,
)
return success_response(
data=CaptionTaskResponse(
task_id=task_id,
status="pending",
),
message="打轴任务已提交",
)
except VolcengineCaptionError as e:
logger.error(f"提交打轴任务失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"提交打轴任务异常: {e}")
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
@router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult])
async def query_auto_align_task(task_id: str, blocking: bool = True):
"""
查询打轴任务结果
"""
try:
service = await get_caption_service()
result = await service.query_auto_align_task(task_id, blocking=blocking)
return success_response(data=result)
except VolcengineCaptionError as e:
logger.error(f"查询打轴任务失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"查询打轴任务异常: {e}")
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
@router.post("/ata/align")
async def auto_align_caption(request: AutoAlignSubmitRequest, max_wait_time: int = 120):
"""
自动字幕打轴(完整流程)
提交打轴任务并轮询结果,直接返回最终数据。
"""
try:
logger.info(f"[Caption API] Auto align request: audio_url={request.audio_url[:50]}...")
service = await get_caption_service()
result = await service.auto_align_caption(
audio_url=request.audio_url,
audio_text=request.audio_text,
caption_type=request.caption_type,
sta_punc_mode=request.sta_punc_mode,
max_wait_time=max_wait_time,
)
logger.info(
f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}"
)
if result.utterances:
logger.info(f"[Caption API] First utterance: {result.utterances[0]}")
# 手动序列化为字典,确保嵌套模型正确处理
response_data = {
"code": 0,
"message": "Success",
"duration": result.duration,
"utterances": [
{
"text": u.text,
"start_time": u.start_time,
"end_time": u.end_time,
}
for u in (result.utterances or [])
],
}
logger.info(f"[Caption API] Response data: {response_data}")
return success_response(data=response_data)
except VolcengineCaptionError as e:
logger.error(f"自动打轴失败: {e}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"自动打轴异常: {e}")
raise HTTPException(status_code=500, detail=f"打轴失败: {str(e)}")
@router.post("/convert/ass", response_model=ApiResponse[dict])
async def convert_to_ass(
result: CaptionResult,
video_width: int = 1080,
video_height: int = 1920,
):
"""
将字幕结果转换为 ASS 格式(使用抖音美好体)
"""
try:
service = VolcengineCaptionService("", "") # 不需要认证
ass_content = service.to_ass(
result.utterances,
video_width=video_width,
video_height=video_height,
)
return success_response(
data={
"ass_content": ass_content,
"font": "DouyinSansBold",
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换ASS失败: {e}")
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
@router.post("/convert/srt", response_model=ApiResponse[dict])
async def convert_to_srt(result: CaptionResult):
"""
将字幕结果转换为 SRT 格式
用于将 /generate 返回的原始数据转换为 SRT 格式。
"""
try:
service = VolcengineCaptionService("", "") # 不需要认证
srt_content = service.to_srt(result.utterances)
return success_response(
data={
"srt_content": srt_content,
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换SRT失败: {e}")
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
@router.post("/convert/vtt", response_model=ApiResponse[dict])
async def convert_to_vtt(result: CaptionResult):
"""
将字幕结果转换为 WebVTT 格式
"""
try:
service = VolcengineCaptionService("", "") # 不需要认证
vtt_content = service.to_vtt(result.utterances)
return success_response(
data={
"vtt_content": vtt_content,
"utterances_count": len(result.utterances),
}
)
except Exception as e:
logger.error(f"转换VTT失败: {e}")
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")