Files

99 lines
3.4 KiB
Python

"""
字幕生成 Schema
===============
火山引擎音视频字幕服务的请求/响应模型。
"""
from __future__ import annotations
from pydantic import BaseModel, Field
class CaptionWord(BaseModel):
"""单个字/词的时间轴信息"""
text: str = Field(description="字/词内容")
start_time: int = Field(description="开始时间(毫秒)")
end_time: int = Field(description="结束时间(毫秒)")
class CaptionUtterance(BaseModel):
"""一句话/一段字幕的时间轴信息"""
text: str = Field(description="文本内容")
start_time: int = Field(description="开始时间(毫秒)")
end_time: int = Field(description="结束时间(毫秒)")
words: list[CaptionWord] | None = Field(default_factory=list, description="字词级时间轴")
class CaptionTaskResponse(BaseModel):
"""字幕任务提交响应"""
task_id: str = Field(description="任务ID")
status: str = Field(description="任务状态: pending/processing/completed/failed")
class CaptionResult(BaseModel):
"""字幕生成结果"""
code: int = Field(description="状态码: 0=成功, 2000=处理中")
message: str = Field(description="状态信息")
duration: float = Field(description="音频时长(秒)")
utterances: list[CaptionUtterance] | None = Field(
default_factory=list, description="字幕时间轴列表"
)
class CaptionSubmitRequest(BaseModel):
"""字幕生成任务提交请求"""
audio_url: str = Field(..., description="音频/视频文件URL")
language: str = Field(
"zh-CN",
description="语言: zh-CN, en-US, ja-JP, ko-KR, es-MX, ru-RU, fr-FR, yue, wuu, nan, ug",
)
caption_type: str = Field(
"auto", description="识别类型: auto(自动), speech(说话), singing(歌词)"
)
use_punc: bool = Field(True, description="自动标点: True/False")
use_itn: bool = Field(True, description="数字转换: True(中文数字转阿拉伯数字)")
words_per_line: int = Field(46, ge=1, le=100, description="每行字数")
max_lines: int = Field(1, ge=1, le=5, description="每屏行数")
class CaptionQueryRequest(BaseModel):
"""字幕任务查询请求"""
task_id: str = Field(..., description="任务ID")
blocking: bool = Field(True, description="是否阻塞等待结果")
class AutoAlignSubmitRequest(BaseModel):
"""自动字幕打轴任务提交请求"""
audio_url: str = Field(..., description="音频/视频文件URL")
audio_text: str = Field(..., description="要打轴的字幕文本")
caption_type: str = Field("speech", description="识别类型: speech(说话), singing(歌词)")
sta_punc_mode: int = Field(
3, ge=1, le=3, description="标点模式: 1=省略句末, 2=空格代替, 3=保留完整"
)
class AutoAlignResult(BaseModel):
"""自动字幕打轴结果"""
code: int = Field(description="状态码: 0=成功, 2000=处理中")
message: str = Field(description="状态信息")
duration: float = Field(description="音频时长(秒)")
utterances: list[CaptionUtterance] | None = Field(
default_factory=list, description="打轴后的字幕时间轴"
)
class SrtSubtitleResponse(BaseModel):
"""SRT 字幕格式响应"""
srt_content: str = Field(description="SRT 格式字幕内容")
utterances: list[CaptionUtterance] = Field(description="原始时间轴数据")