99 lines
3.4 KiB
Python
99 lines
3.4 KiB
Python
"""
|
|
字幕生成 Schema
|
|
===============
|
|
|
|
火山引擎音视频字幕服务的请求/响应模型。
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class CaptionWord(BaseModel):
|
|
"""单个字/词的时间轴信息"""
|
|
|
|
text: str = Field(description="字/词内容")
|
|
start_time: int = Field(description="开始时间(毫秒)")
|
|
end_time: int = Field(description="结束时间(毫秒)")
|
|
|
|
|
|
class CaptionUtterance(BaseModel):
|
|
"""一句话/一段字幕的时间轴信息"""
|
|
|
|
text: str = Field(description="文本内容")
|
|
start_time: int = Field(description="开始时间(毫秒)")
|
|
end_time: int = Field(description="结束时间(毫秒)")
|
|
words: list[CaptionWord] | None = Field(default_factory=list, description="字词级时间轴")
|
|
|
|
|
|
class CaptionTaskResponse(BaseModel):
|
|
"""字幕任务提交响应"""
|
|
|
|
task_id: str = Field(description="任务ID")
|
|
status: str = Field(description="任务状态: pending/processing/completed/failed")
|
|
|
|
|
|
class CaptionResult(BaseModel):
|
|
"""字幕生成结果"""
|
|
|
|
code: int = Field(description="状态码: 0=成功, 2000=处理中")
|
|
message: str = Field(description="状态信息")
|
|
duration: float = Field(description="音频时长(秒)")
|
|
utterances: list[CaptionUtterance] | None = Field(
|
|
default_factory=list, description="字幕时间轴列表"
|
|
)
|
|
|
|
|
|
class CaptionSubmitRequest(BaseModel):
|
|
"""字幕生成任务提交请求"""
|
|
|
|
audio_url: str = Field(..., description="音频/视频文件URL")
|
|
language: str = Field(
|
|
"zh-CN",
|
|
description="语言: zh-CN, en-US, ja-JP, ko-KR, es-MX, ru-RU, fr-FR, yue, wuu, nan, ug",
|
|
)
|
|
caption_type: str = Field(
|
|
"auto", description="识别类型: auto(自动), speech(说话), singing(歌词)"
|
|
)
|
|
use_punc: bool = Field(True, description="自动标点: True/False")
|
|
use_itn: bool = Field(True, description="数字转换: True(中文数字转阿拉伯数字)")
|
|
words_per_line: int = Field(46, ge=1, le=100, description="每行字数")
|
|
max_lines: int = Field(1, ge=1, le=5, description="每屏行数")
|
|
|
|
|
|
class CaptionQueryRequest(BaseModel):
|
|
"""字幕任务查询请求"""
|
|
|
|
task_id: str = Field(..., description="任务ID")
|
|
blocking: bool = Field(True, description="是否阻塞等待结果")
|
|
|
|
|
|
class AutoAlignSubmitRequest(BaseModel):
|
|
"""自动字幕打轴任务提交请求"""
|
|
|
|
audio_url: str = Field(..., description="音频/视频文件URL")
|
|
audio_text: str = Field(..., description="要打轴的字幕文本")
|
|
caption_type: str = Field("speech", description="识别类型: speech(说话), singing(歌词)")
|
|
sta_punc_mode: int = Field(
|
|
3, ge=1, le=3, description="标点模式: 1=省略句末, 2=空格代替, 3=保留完整"
|
|
)
|
|
|
|
|
|
class AutoAlignResult(BaseModel):
|
|
"""自动字幕打轴结果"""
|
|
|
|
code: int = Field(description="状态码: 0=成功, 2000=处理中")
|
|
message: str = Field(description="状态信息")
|
|
duration: float = Field(description="音频时长(秒)")
|
|
utterances: list[CaptionUtterance] | None = Field(
|
|
default_factory=list, description="打轴后的字幕时间轴"
|
|
)
|
|
|
|
|
|
class SrtSubtitleResponse(BaseModel):
|
|
"""SRT 字幕格式响应"""
|
|
|
|
srt_content: str = Field(description="SRT 格式字幕内容")
|
|
utterances: list[CaptionUtterance] = Field(description="原始时间轴数据")
|