95e55293c6
后端安全: - DEBUG 默认 True → False - 彻底移除 AUTH_BYPASS 认证绕过 - 验证码不再明文打印到日志 - 上传接口增加大小限制(500MB/20MB/100MB)与魔数校验 - python-jose → PyJWT, 更新 requirements.lock/uv.lock - Bandit 恢复关键规则(B104/B301/B305/B314/B324/B603/B607) - 修复 5 处 try_except_pass, 15 处加 nosec 注释 - 启用 Bandit pre-commit 钩子 前端安全: - 配置完整 CSP 策略 - 收紧 Capabilities(fs:allow-read-file → $RESOURCE/**) - 移除硬编码 devToken - 清理前端 TODO(美家卡智影命名统一) 部署修复: - docker-compose.prod 增加 alembic 迁移步骤 - api + scheduler 增加 Redis 心跳健康检查 - Nginx 添加安全响应头 - Nginx client_max_body_size 100M → 500M - .env.example 补充 UPLOAD_MAX_* 配置与安全注释 其他: - /voice/upload 合并到 /upload/audio - Rust 上传增加文件大小检查 - 清理 Rust 19 处 println! + 前端 21 处 console.info - 修复 VideoCompose.tsx toast 未导入(已有bug)
220 lines
9.4 KiB
Python
220 lines
9.4 KiB
Python
"""
|
|
Subtitle 任务处理器
|
|
==================
|
|
|
|
管理火山引擎字幕生成与自动打轴的提交与轮询。
|
|
支持两种模式:
|
|
- caption: 字幕识别(从音频/视频提取带时间轴的字幕)
|
|
- auto_align: 自动打轴(为已有字幕文本配上时间轴)
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
from app.core.platform_config import get_platform_config_loader
|
|
from app.scheduler.handlers.base import AsyncHandler
|
|
from app.scheduler.models import StateChange
|
|
from app.scheduler.registry import TaskRegistry
|
|
from app.scheduler.slot_manager import SlotManager
|
|
from app.services.volcengine_caption_service import VolcengineCaptionService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SLOT_KEY = "volc:subtitle_slots"
|
|
|
|
|
|
def _get_subtitle_max_slots() -> int:
|
|
"""从 platform-config.yaml 读取 rate_limit 配置作为 max_slots"""
|
|
try:
|
|
loader = get_platform_config_loader()
|
|
platform = loader.get_platform("volcengine_caption")
|
|
if platform:
|
|
return int(platform.rate_limit_qps)
|
|
except Exception as e:
|
|
logger.warning(f"读取字幕平台 rate_limit 配置失败: {e}")
|
|
return 5
|
|
|
|
|
|
class SubtitleHandler(AsyncHandler):
|
|
name = "subtitle"
|
|
slot_key = SLOT_KEY
|
|
max_slots = _get_subtitle_max_slots()
|
|
|
|
def __init__(self, service: VolcengineCaptionService | None = None):
|
|
self.service = service
|
|
|
|
def _get_service(self) -> VolcengineCaptionService:
|
|
if self.service is None:
|
|
raise RuntimeError(
|
|
"SubtitleHandler 需要通过构造函数传入 VolcengineCaptionService 实例"
|
|
)
|
|
return self.service
|
|
|
|
async def tick(
|
|
self, tasks: list[Any], registry: TaskRegistry, slots: SlotManager
|
|
) -> list[StateChange]:
|
|
changes: list[StateChange] = []
|
|
|
|
for task in tasks:
|
|
params = task.params or {}
|
|
mode = params.get("mode", "caption")
|
|
volc_task_id = params.get("volc_task_id")
|
|
project_id = params.get("project_id", "")
|
|
video_path = params.get("video", params.get("video_path", ""))
|
|
language = params.get("language", "zh")
|
|
audio_text = params.get("audio_text", "")
|
|
|
|
if volc_task_id:
|
|
# 轮询阶段:不占用 slot,直接查询状态(使用标准 TaskStatus)
|
|
try:
|
|
service = self._get_service()
|
|
if mode == "auto_align":
|
|
status = await service.query_auto_align_task_status(volc_task_id)
|
|
else:
|
|
status = await service.query_caption_task_status(volc_task_id)
|
|
|
|
if status.state == "completed":
|
|
result_data = status.result or {}
|
|
utterances_raw = result_data.get("utterances", [])
|
|
utterances = [
|
|
{
|
|
"text": u.get("text", ""),
|
|
"start_time": u.get("start_time", 0) or u.get("startTime", 0),
|
|
"end_time": u.get("end_time", 0) or u.get("endTime", 0),
|
|
}
|
|
for u in utterances_raw
|
|
]
|
|
result_payload = {
|
|
"project_id": project_id,
|
|
"video_path": video_path,
|
|
"language": language,
|
|
"mode": mode,
|
|
"duration": result_data.get("duration", 0.0),
|
|
"utterances": utterances,
|
|
}
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="status", value="completed")
|
|
)
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="progress", value=100)
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id, field_path="message", value="字幕生成完成"
|
|
)
|
|
)
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="completed", value=1)
|
|
)
|
|
changes.append(StateChange(task_id=task.task_id, field_path="total", value=1))
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="result", value=result_payload)
|
|
)
|
|
elif status.state == "failed":
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="status", value="failed")
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="message",
|
|
value="字幕识别失败,请稍后重试",
|
|
)
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="error",
|
|
value=status.error_message or "字幕识别失败",
|
|
)
|
|
)
|
|
# state == "processing" / "pending":不做任何变更,继续轮询
|
|
except RuntimeError:
|
|
logger.error(f"[Subtitle {task.task_id}] service not initialized")
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="status", value="failed")
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="message",
|
|
value="字幕处理服务未就绪",
|
|
)
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="error",
|
|
value="字幕处理服务未就绪",
|
|
)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[Subtitle {task.task_id}] poll error: {e}")
|
|
continue
|
|
|
|
# 提交阶段:占用 slot,提交成功后自动释放
|
|
async with slots.acquire_ctx(SLOT_KEY, task.task_id, self.max_slots) as acquired:
|
|
if not acquired:
|
|
continue
|
|
|
|
try:
|
|
service = self._get_service()
|
|
if mode == "auto_align":
|
|
if not audio_text:
|
|
raise ValueError("auto_align 模式需要提供 audio_text")
|
|
volc_task_id = await service.submit_auto_align_task(
|
|
audio_url=video_path,
|
|
audio_text=audio_text,
|
|
)
|
|
else:
|
|
volc_task_id = await service.submit_caption_task(
|
|
audio_url=video_path, language=language
|
|
)
|
|
if not volc_task_id:
|
|
raise ValueError("未返回任务ID")
|
|
params["volc_task_id"] = volc_task_id
|
|
changes.append(StateChange(task_id=task.task_id, field_path="params", value=params))
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="message", value="字幕任务已提交")
|
|
)
|
|
except RuntimeError:
|
|
logger.error(f"[Subtitle {task.task_id}] service not initialized")
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="status", value="failed")
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="message",
|
|
value="字幕处理服务未就绪",
|
|
)
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="error",
|
|
value="字幕处理服务未就绪",
|
|
)
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[Subtitle {task.task_id}] submit error: {e}")
|
|
changes.append(
|
|
StateChange(task_id=task.task_id, field_path="status", value="failed")
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="message",
|
|
value="字幕任务提交失败,请稍后重试",
|
|
)
|
|
)
|
|
changes.append(
|
|
StateChange(
|
|
task_id=task.task_id,
|
|
field_path="error",
|
|
value="字幕任务提交失败",
|
|
)
|
|
)
|
|
|
|
return changes
|