375 lines
12 KiB
Python
375 lines
12 KiB
Python
"""
|
||
火山引擎音视频字幕 API 路由
|
||
============================
|
||
|
||
提供字幕生成、自动打轴等功能。
|
||
"""
|
||
|
||
import logging
|
||
|
||
from fastapi import APIRouter, HTTPException
|
||
|
||
from app.schemas.caption import (
|
||
AutoAlignResult,
|
||
AutoAlignSubmitRequest,
|
||
CaptionResult,
|
||
CaptionSubmitRequest,
|
||
CaptionTaskResponse,
|
||
SrtSubtitleResponse,
|
||
)
|
||
from app.schemas.common import ApiResponse, success_response
|
||
from app.services.volcengine_caption_service import (
|
||
VolcengineCaptionError,
|
||
VolcengineCaptionService,
|
||
get_caption_service,
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
router = APIRouter(prefix="/caption", tags=["Caption"])
|
||
|
||
|
||
@router.post("/submit", response_model=ApiResponse[CaptionTaskResponse])
|
||
async def submit_caption_task(request: CaptionSubmitRequest):
|
||
"""
|
||
提交字幕生成任务
|
||
|
||
提交音频/视频文件URL,生成带时间轴的字幕。
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
task_id = await service.submit_caption_task(
|
||
audio_url=request.audio_url,
|
||
language=request.language,
|
||
caption_type=request.caption_type,
|
||
use_punc=request.use_punc,
|
||
use_itn=request.use_itn,
|
||
words_per_line=request.words_per_line,
|
||
max_lines=request.max_lines,
|
||
)
|
||
|
||
return success_response(
|
||
data=CaptionTaskResponse(
|
||
task_id=task_id,
|
||
status="pending",
|
||
),
|
||
message="字幕任务已提交",
|
||
)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"提交字幕任务失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"提交字幕任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
|
||
|
||
|
||
@router.get("/query/{task_id}", response_model=ApiResponse[CaptionResult])
|
||
async def query_caption_task(task_id: str, blocking: bool = True):
|
||
"""
|
||
查询字幕任务结果
|
||
|
||
Args:
|
||
task_id: 任务ID
|
||
blocking: 是否阻塞等待结果 (默认True)
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
result = await service.query_caption_task(task_id, blocking=blocking)
|
||
|
||
return success_response(data=result)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"查询字幕任务失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"查询字幕任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
|
||
|
||
|
||
@router.post("/generate", response_model=ApiResponse[CaptionResult])
|
||
async def generate_caption(request: CaptionSubmitRequest, max_wait_time: int = 120):
|
||
"""
|
||
生成字幕(完整流程)
|
||
|
||
提交任务并轮询结果,直接返回最终字幕数据。
|
||
适用于不需要异步处理的场景。
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
result = await service.generate_caption(
|
||
audio_url=request.audio_url,
|
||
language=request.language,
|
||
caption_type=request.caption_type,
|
||
use_punc=request.use_punc,
|
||
use_itn=request.use_itn,
|
||
words_per_line=request.words_per_line,
|
||
max_lines=request.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
return success_response(data=result)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"生成字幕失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"生成字幕异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/generate-ass", response_model=ApiResponse[dict])
|
||
async def generate_ass(
|
||
request: CaptionSubmitRequest,
|
||
video_width: int = 1080,
|
||
video_height: int = 1920,
|
||
max_wait_time: int = 120,
|
||
):
|
||
"""
|
||
生成 ASS 格式字幕(完整流程,使用抖音美好体)
|
||
|
||
Args:
|
||
video_width: 视频宽度(默认 1080)
|
||
video_height: 视频高度(默认 1920)
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
result = await service.generate_caption(
|
||
audio_url=request.audio_url,
|
||
language=request.language,
|
||
caption_type=request.caption_type,
|
||
use_punc=request.use_punc,
|
||
use_itn=request.use_itn,
|
||
words_per_line=request.words_per_line,
|
||
max_lines=request.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
ass_content = service.to_ass(
|
||
result.utterances,
|
||
video_width=video_width,
|
||
video_height=video_height,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"ass_content": ass_content,
|
||
"utterances": result.utterances,
|
||
"duration": result.duration,
|
||
"font": "DouyinSansBold",
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"生成ASS字幕失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/generate-srt", response_model=ApiResponse[SrtSubtitleResponse])
|
||
async def generate_srt(request: CaptionSubmitRequest, max_wait_time: int = 120):
|
||
"""
|
||
生成 SRT 格式字幕(完整流程)
|
||
|
||
直接返回 SRT 格式字幕文件内容。
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
result = await service.generate_caption(
|
||
audio_url=request.audio_url,
|
||
language=request.language,
|
||
caption_type=request.caption_type,
|
||
use_punc=request.use_punc,
|
||
use_itn=request.use_itn,
|
||
words_per_line=request.words_per_line,
|
||
max_lines=request.max_lines,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
|
||
srt_content = service.to_srt(result.utterances)
|
||
|
||
return success_response(
|
||
data=SrtSubtitleResponse(
|
||
srt_content=srt_content,
|
||
utterances=result.utterances,
|
||
)
|
||
)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"生成SRT字幕失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"生成SRT字幕异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"生成失败: {str(e)}")
|
||
|
||
|
||
@router.post("/ata/submit", response_model=ApiResponse[CaptionTaskResponse])
|
||
async def submit_auto_align_task(request: AutoAlignSubmitRequest):
|
||
"""
|
||
提交自动字幕打轴任务
|
||
|
||
为已有字幕文本自动配上时间轴。
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
task_id = await service.submit_auto_align_task(
|
||
audio_url=request.audio_url,
|
||
audio_text=request.audio_text,
|
||
caption_type=request.caption_type,
|
||
sta_punc_mode=request.sta_punc_mode,
|
||
)
|
||
|
||
return success_response(
|
||
data=CaptionTaskResponse(
|
||
task_id=task_id,
|
||
status="pending",
|
||
),
|
||
message="打轴任务已提交",
|
||
)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"提交打轴任务失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"提交打轴任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"提交失败: {str(e)}")
|
||
|
||
|
||
@router.get("/ata/query/{task_id}", response_model=ApiResponse[AutoAlignResult])
|
||
async def query_auto_align_task(task_id: str, blocking: bool = True):
|
||
"""
|
||
查询打轴任务结果
|
||
"""
|
||
try:
|
||
service = await get_caption_service()
|
||
result = await service.query_auto_align_task(task_id, blocking=blocking)
|
||
|
||
return success_response(data=result)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"查询打轴任务失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"查询打轴任务异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"查询失败: {str(e)}")
|
||
|
||
|
||
@router.post("/ata/align")
|
||
async def auto_align_caption(request: AutoAlignSubmitRequest, max_wait_time: int = 120):
|
||
"""
|
||
自动字幕打轴(完整流程)
|
||
|
||
提交打轴任务并轮询结果,直接返回最终数据。
|
||
"""
|
||
try:
|
||
logger.info(f"[Caption API] Auto align request: audio_url={request.audio_url[:50]}...")
|
||
service = await get_caption_service()
|
||
result = await service.auto_align_caption(
|
||
audio_url=request.audio_url,
|
||
audio_text=request.audio_text,
|
||
caption_type=request.caption_type,
|
||
sta_punc_mode=request.sta_punc_mode,
|
||
max_wait_time=max_wait_time,
|
||
)
|
||
logger.info(
|
||
f"[Caption API] Auto align result: utterances_count={len(result.utterances) if result.utterances else 0}"
|
||
)
|
||
if result.utterances:
|
||
logger.info(f"[Caption API] First utterance: {result.utterances[0]}")
|
||
|
||
# 手动序列化为字典,确保嵌套模型正确处理
|
||
response_data = {
|
||
"code": 0,
|
||
"message": "Success",
|
||
"duration": result.duration,
|
||
"utterances": [
|
||
{
|
||
"text": u.text,
|
||
"start_time": u.start_time,
|
||
"end_time": u.end_time,
|
||
}
|
||
for u in (result.utterances or [])
|
||
],
|
||
}
|
||
logger.info(f"[Caption API] Response data: {response_data}")
|
||
return success_response(data=response_data)
|
||
|
||
except VolcengineCaptionError as e:
|
||
logger.error(f"自动打轴失败: {e}")
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
except Exception as e:
|
||
logger.error(f"自动打轴异常: {e}")
|
||
raise HTTPException(status_code=500, detail=f"打轴失败: {str(e)}")
|
||
|
||
|
||
@router.post("/convert/ass", response_model=ApiResponse[dict])
|
||
async def convert_to_ass(
|
||
result: CaptionResult,
|
||
video_width: int = 1080,
|
||
video_height: int = 1920,
|
||
):
|
||
"""
|
||
将字幕结果转换为 ASS 格式(使用抖音美好体)
|
||
"""
|
||
try:
|
||
service = VolcengineCaptionService("", "") # 不需要认证
|
||
ass_content = service.to_ass(
|
||
result.utterances,
|
||
video_width=video_width,
|
||
video_height=video_height,
|
||
)
|
||
|
||
return success_response(
|
||
data={
|
||
"ass_content": ass_content,
|
||
"font": "DouyinSansBold",
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换ASS失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|
||
|
||
|
||
@router.post("/convert/srt", response_model=ApiResponse[dict])
|
||
async def convert_to_srt(result: CaptionResult):
|
||
"""
|
||
将字幕结果转换为 SRT 格式
|
||
|
||
用于将 /generate 返回的原始数据转换为 SRT 格式。
|
||
"""
|
||
try:
|
||
service = VolcengineCaptionService("", "") # 不需要认证
|
||
srt_content = service.to_srt(result.utterances)
|
||
|
||
return success_response(
|
||
data={
|
||
"srt_content": srt_content,
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换SRT失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|
||
|
||
|
||
@router.post("/convert/vtt", response_model=ApiResponse[dict])
|
||
async def convert_to_vtt(result: CaptionResult):
|
||
"""
|
||
将字幕结果转换为 WebVTT 格式
|
||
"""
|
||
try:
|
||
service = VolcengineCaptionService("", "") # 不需要认证
|
||
vtt_content = service.to_vtt(result.utterances)
|
||
|
||
return success_response(
|
||
data={
|
||
"vtt_content": vtt_content,
|
||
"utterances_count": len(result.utterances),
|
||
}
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"转换VTT失败: {e}")
|
||
raise HTTPException(status_code=500, detail=f"转换失败: {str(e)}")
|