fix: 素材匹配兼容不可见字符 + 时长容错 + UI 细节修复

- material_service: 精确查询失败后全量内存标准化匹配,兼容数据库 name 含不可见字符
- material_service: 素材时长过滤放宽到 70% 兜底,避免打轴合并导致匹配失败
- material_service: 增加详细 warn 日志,便于诊断未匹配原因
- broll_category: 新增 get_by_level 方法供全量查询使用
- VoiceMaterialLibrary: 上传弹窗文案换行显示
- ScriptCreation: 主题卡片 min-height 64px 修复文字截断
This commit is contained in:
小鱼开发
2026-06-05 17:26:30 +08:00
parent d7b9c3ac3b
commit 47bb987e06
4 changed files with 84 additions and 28 deletions
+12
View File
@@ -70,6 +70,18 @@ class BrollCategoryCRUD(CRUDBase[BrollCategory]):
)
return result.scalar_one_or_none()
async def get_by_level(
self, db: AsyncSession, *, level: int
) -> list[BrollCategory]:
"""根据层级获取所有启用的分类"""
result = await db.execute(
select(BrollCategory).where(
BrollCategory.level == level,
BrollCategory.status == "active",
)
)
return list(result.scalars().all())
# 导出实例
broll_category = BrollCategoryCRUD()
+69 -26
View File
@@ -26,7 +26,12 @@ _USED_MATERIALS_TTL = 7 * 24 * 3600
def _normalize_scene(scene: str) -> str:
"""标准化场景描述,用于匹配三级分类 name"""
# 去除所有 Unicode 空白字符(空格、全角空格、换行、tab 等)
return re.sub(r"\s+", "", scene)
cleaned = re.sub(r"\s+", "", scene)
# 去除常见中文标点符号(逗号、句号、感叹号、问号、顿号、分号、冒号、引号、括号等)
cleaned = re.sub(r"[,。!?、;:""''()【】《》]+", "", cleaned)
# 去除零宽字符(零宽空格、零宽非连接符、零宽连接符、零宽非断空格等)
cleaned = re.sub(r"[\u200b-\u200f\ufeff]+", "", cleaned)
return cleaned
def _weighted_choice(materials: list) -> object: # noqa: ANN001
@@ -155,11 +160,21 @@ async def match_material(
normalized = _normalize_scene(scene)
# 1. 查找三级分类(精确匹配 + 顺序颠倒兜底
# 1. 查找三级分类(精确匹配 -> 全量内存匹配兜底 -> 顺序颠倒 -> 上级回退
category = await broll_category.get_by_name_and_level(
db, name=normalized, level=3
)
# 精确匹配失败,尝试将 "A-B" 倒序为 "B-A" 再匹配
# 精确匹配失败时,全量查询后在内存标准化匹配(兼容数据库 name 含不可见字符)
if category is None:
all_categories = await broll_category.get_by_level(db, level=3)
for c in all_categories:
if _normalize_scene(c.name) == normalized:
category = c
logger.info(
f"素材分类全量内存匹配命中: '{normalized}' -> '{c.name}'"
)
break
# 若仍失败,尝试将 "A-B" 倒序为 "B-A" 再匹配
if category is None:
parts = normalized.rsplit("-", 1)
if len(parts) == 2:
@@ -179,16 +194,27 @@ async def match_material(
f"素材回退到上级分类命中: '{normalized}' -> '{category.name}'"
)
if category is None:
logger.debug(f"未找到分类: {normalized}")
logger.warning(f"素材匹配失败: 未找到分类 '{normalized}' (原始 scene: '{scene}')")
return None
# 2. 查询候选素材
materials = await broll_material.get_active_by_category_and_duration(
db, category_id=category.id, min_duration=required_duration
# 2. 查询该分类下所有 active 素材(先不过滤时长,用于日志诊断)
all_materials = await broll_material.get_active_by_categories(
db, category_ids=[category.id]
)
if not all_materials:
logger.warning(f"素材匹配失败: 分类 '{normalized}' 下无任何可用素材")
return None
# 按时长过滤(优先严格匹配,失败时逐步放宽到 70% 兜底)
materials = [m for m in all_materials if m.duration >= required_duration]
if not materials:
logger.debug(
f"分类 {normalized} 无足够时长的素材 (需 >= {required_duration}s)"
materials = [m for m in all_materials if m.duration >= required_duration * 0.7]
if not materials:
materials = all_materials
if not materials:
max_duration = max(m.duration for m in all_materials)
logger.warning(
f"素材匹配失败: 分类 '{normalized}' 无足够时长的素材 (需 >= {required_duration}s, 最大可用: {max_duration}s)"
)
return None
@@ -255,31 +281,36 @@ async def batch_match(
normalized_scenes = [_normalize_scene(s["scene"]) for s in scenes]
unique_names = list(set(normalized_scenes))
# 2. 批量查询分类(1 次 DB)—— 同时查询原始名和倒序名
reversed_names: list[str] = []
name_to_reversed: dict[str, str] = {}
for name in unique_names:
parts = name.rsplit("-", 1)
if len(parts) == 2:
rev = f"{parts[1]}-{parts[0]}"
reversed_names.append(rev)
name_to_reversed[name] = rev
all_query_names = unique_names + reversed_names
# 2. 批量查询分类:优先精确查询,失败时全量内存匹配兜底
categories = await broll_category.get_by_names_and_level(
db, names=all_query_names, level=3
db, names=unique_names, level=3
)
category_map: dict[str, object] = {}
for c in categories:
category_map[c.name] = c
category_map[_normalize_scene(c.name)] = c
# 收集未命中的 name,准备全量兜底
unmatched_by_exact = [name for name in unique_names if name not in category_map]
if unmatched_by_exact:
all_categories = await broll_category.get_by_level(db, level=3)
for c in all_categories:
normalized_db_name = _normalize_scene(c.name)
if normalized_db_name not in category_map:
category_map[normalized_db_name] = c
# 构建原始 scene -> category 的映射
reversed_map: dict[str, str] = {}
for name in unique_names:
parts = name.rsplit("-", 1)
if len(parts) == 2:
reversed_map[name] = f"{parts[1]}-{parts[0]}"
# 构建原始 scene -> category 的映射(优先精确匹配,fallback 倒序匹配)
scene_to_category: dict[str, object] = {}
for name in unique_names:
if name in category_map:
scene_to_category[name] = category_map[name]
elif name in name_to_reversed and name_to_reversed[name] in category_map:
rev = name_to_reversed[name]
elif name in reversed_map and reversed_map[name] in category_map:
rev = reversed_map[name]
scene_to_category[name] = category_map[rev]
logger.info(
f"批量匹配顺序颠倒兜底命中: '{name}' -> '{rev}'"
@@ -331,13 +362,25 @@ async def batch_match(
category = scene_to_category.get(scene_name)
if category is None:
original_scene = scenes[idx]["scene"]
logger.warning(
f"批量素材匹配失败: 未找到分类 '{scene_name}' (原始 scene: '{original_scene}')"
)
results.append(None)
continue
materials = materials_by_category.get(category.id, [])
# 按时长过滤
# 按时长过滤(优先严格匹配,失败时逐步放宽到 70% 兜底)
candidates = [m for m in materials if m.duration >= required_duration]
if not candidates:
candidates = [m for m in materials if m.duration >= required_duration * 0.7]
if not candidates:
candidates = materials
if not candidates:
max_duration = max((m.duration for m in materials), default=0)
logger.warning(
f"批量素材匹配失败: 分类 '{scene_name}' -> '{category.name}' 无足够时长的素材 (需 >= {required_duration}s, 最大可用: {max_duration}s)"
)
results.append(None)
continue
@@ -418,7 +418,8 @@ export default function VoiceMaterialLibrary() {
<div style={{ color: 'var(--text-secondary)' }}>
<div style={{ fontSize: 'var(--font-sm)' }}></div>
<div style={{ fontSize: 'var(--font-xs)', marginTop: 6, lineHeight: 1.6 }}>
MP3 / M4A / WAV / MP4 10 ~ 2 20MB
<div> MP3 / M4A / WAV / MP4</div>
<div> 10 ~ 2 20MB</div>
</div>
</div>
)}
@@ -30,7 +30,7 @@
align-items: flex-start;
justify-content: center;
gap: 3px;
min-height: 38px;
min-height: 64px;
border-radius: 10px;
border: 1px solid #e8e8e8;
background: #fff;