fix: 素材匹配兼容不可见字符 + 时长容错 + UI 细节修复
- material_service: 精确查询失败后全量内存标准化匹配,兼容数据库 name 含不可见字符 - material_service: 素材时长过滤放宽到 70% 兜底,避免打轴合并导致匹配失败 - material_service: 增加详细 warn 日志,便于诊断未匹配原因 - broll_category: 新增 get_by_level 方法供全量查询使用 - VoiceMaterialLibrary: 上传弹窗文案换行显示 - ScriptCreation: 主题卡片 min-height 64px 修复文字截断
This commit is contained in:
@@ -70,6 +70,18 @@ class BrollCategoryCRUD(CRUDBase[BrollCategory]):
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def get_by_level(
|
||||
self, db: AsyncSession, *, level: int
|
||||
) -> list[BrollCategory]:
|
||||
"""根据层级获取所有启用的分类"""
|
||||
result = await db.execute(
|
||||
select(BrollCategory).where(
|
||||
BrollCategory.level == level,
|
||||
BrollCategory.status == "active",
|
||||
)
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
# 导出实例
|
||||
broll_category = BrollCategoryCRUD()
|
||||
|
||||
@@ -26,7 +26,12 @@ _USED_MATERIALS_TTL = 7 * 24 * 3600
|
||||
def _normalize_scene(scene: str) -> str:
|
||||
"""标准化场景描述,用于匹配三级分类 name"""
|
||||
# 去除所有 Unicode 空白字符(空格、全角空格、换行、tab 等)
|
||||
return re.sub(r"\s+", "", scene)
|
||||
cleaned = re.sub(r"\s+", "", scene)
|
||||
# 去除常见中文标点符号(逗号、句号、感叹号、问号、顿号、分号、冒号、引号、括号等)
|
||||
cleaned = re.sub(r"[,。!?、;:""''()【】《》]+", "", cleaned)
|
||||
# 去除零宽字符(零宽空格、零宽非连接符、零宽连接符、零宽非断空格等)
|
||||
cleaned = re.sub(r"[\u200b-\u200f\ufeff]+", "", cleaned)
|
||||
return cleaned
|
||||
|
||||
|
||||
def _weighted_choice(materials: list) -> object: # noqa: ANN001
|
||||
@@ -155,11 +160,21 @@ async def match_material(
|
||||
|
||||
normalized = _normalize_scene(scene)
|
||||
|
||||
# 1. 查找三级分类(精确匹配 + 顺序颠倒兜底)
|
||||
# 1. 查找三级分类(精确匹配 -> 全量内存匹配兜底 -> 顺序颠倒 -> 上级回退)
|
||||
category = await broll_category.get_by_name_and_level(
|
||||
db, name=normalized, level=3
|
||||
)
|
||||
# 若精确匹配失败,尝试将 "A-B" 倒序为 "B-A" 再匹配
|
||||
# 精确匹配失败时,全量查询后在内存标准化匹配(兼容数据库 name 含不可见字符)
|
||||
if category is None:
|
||||
all_categories = await broll_category.get_by_level(db, level=3)
|
||||
for c in all_categories:
|
||||
if _normalize_scene(c.name) == normalized:
|
||||
category = c
|
||||
logger.info(
|
||||
f"素材分类全量内存匹配命中: '{normalized}' -> '{c.name}'"
|
||||
)
|
||||
break
|
||||
# 若仍失败,尝试将 "A-B" 倒序为 "B-A" 再匹配
|
||||
if category is None:
|
||||
parts = normalized.rsplit("-", 1)
|
||||
if len(parts) == 2:
|
||||
@@ -179,16 +194,27 @@ async def match_material(
|
||||
f"素材回退到上级分类命中: '{normalized}' -> '{category.name}'"
|
||||
)
|
||||
if category is None:
|
||||
logger.debug(f"未找到分类: {normalized}")
|
||||
logger.warning(f"素材匹配失败: 未找到分类 '{normalized}' (原始 scene: '{scene}')")
|
||||
return None
|
||||
|
||||
# 2. 查询候选素材
|
||||
materials = await broll_material.get_active_by_category_and_duration(
|
||||
db, category_id=category.id, min_duration=required_duration
|
||||
# 2. 查询该分类下所有 active 素材(先不过滤时长,用于日志诊断)
|
||||
all_materials = await broll_material.get_active_by_categories(
|
||||
db, category_ids=[category.id]
|
||||
)
|
||||
if not all_materials:
|
||||
logger.warning(f"素材匹配失败: 分类 '{normalized}' 下无任何可用素材")
|
||||
return None
|
||||
|
||||
# 按时长过滤(优先严格匹配,失败时逐步放宽到 70% 兜底)
|
||||
materials = [m for m in all_materials if m.duration >= required_duration]
|
||||
if not materials:
|
||||
logger.debug(
|
||||
f"分类 {normalized} 无足够时长的素材 (需 >= {required_duration}s)"
|
||||
materials = [m for m in all_materials if m.duration >= required_duration * 0.7]
|
||||
if not materials:
|
||||
materials = all_materials
|
||||
if not materials:
|
||||
max_duration = max(m.duration for m in all_materials)
|
||||
logger.warning(
|
||||
f"素材匹配失败: 分类 '{normalized}' 无足够时长的素材 (需 >= {required_duration}s, 最大可用: {max_duration}s)"
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -255,31 +281,36 @@ async def batch_match(
|
||||
normalized_scenes = [_normalize_scene(s["scene"]) for s in scenes]
|
||||
unique_names = list(set(normalized_scenes))
|
||||
|
||||
# 2. 批量查询分类(1 次 DB)—— 同时查询原始名和倒序名
|
||||
reversed_names: list[str] = []
|
||||
name_to_reversed: dict[str, str] = {}
|
||||
for name in unique_names:
|
||||
parts = name.rsplit("-", 1)
|
||||
if len(parts) == 2:
|
||||
rev = f"{parts[1]}-{parts[0]}"
|
||||
reversed_names.append(rev)
|
||||
name_to_reversed[name] = rev
|
||||
|
||||
all_query_names = unique_names + reversed_names
|
||||
# 2. 批量查询分类:优先精确查询,失败时全量内存匹配兜底
|
||||
categories = await broll_category.get_by_names_and_level(
|
||||
db, names=all_query_names, level=3
|
||||
db, names=unique_names, level=3
|
||||
)
|
||||
category_map: dict[str, object] = {}
|
||||
for c in categories:
|
||||
category_map[c.name] = c
|
||||
category_map[_normalize_scene(c.name)] = c
|
||||
|
||||
# 收集未命中的 name,准备全量兜底
|
||||
unmatched_by_exact = [name for name in unique_names if name not in category_map]
|
||||
if unmatched_by_exact:
|
||||
all_categories = await broll_category.get_by_level(db, level=3)
|
||||
for c in all_categories:
|
||||
normalized_db_name = _normalize_scene(c.name)
|
||||
if normalized_db_name not in category_map:
|
||||
category_map[normalized_db_name] = c
|
||||
|
||||
# 构建原始 scene -> category 的映射
|
||||
reversed_map: dict[str, str] = {}
|
||||
for name in unique_names:
|
||||
parts = name.rsplit("-", 1)
|
||||
if len(parts) == 2:
|
||||
reversed_map[name] = f"{parts[1]}-{parts[0]}"
|
||||
|
||||
# 构建原始 scene -> category 的映射(优先精确匹配,fallback 倒序匹配)
|
||||
scene_to_category: dict[str, object] = {}
|
||||
for name in unique_names:
|
||||
if name in category_map:
|
||||
scene_to_category[name] = category_map[name]
|
||||
elif name in name_to_reversed and name_to_reversed[name] in category_map:
|
||||
rev = name_to_reversed[name]
|
||||
elif name in reversed_map and reversed_map[name] in category_map:
|
||||
rev = reversed_map[name]
|
||||
scene_to_category[name] = category_map[rev]
|
||||
logger.info(
|
||||
f"批量匹配顺序颠倒兜底命中: '{name}' -> '{rev}'"
|
||||
@@ -331,13 +362,25 @@ async def batch_match(
|
||||
|
||||
category = scene_to_category.get(scene_name)
|
||||
if category is None:
|
||||
original_scene = scenes[idx]["scene"]
|
||||
logger.warning(
|
||||
f"批量素材匹配失败: 未找到分类 '{scene_name}' (原始 scene: '{original_scene}')"
|
||||
)
|
||||
results.append(None)
|
||||
continue
|
||||
|
||||
materials = materials_by_category.get(category.id, [])
|
||||
# 按时长过滤
|
||||
# 按时长过滤(优先严格匹配,失败时逐步放宽到 70% 兜底)
|
||||
candidates = [m for m in materials if m.duration >= required_duration]
|
||||
if not candidates:
|
||||
candidates = [m for m in materials if m.duration >= required_duration * 0.7]
|
||||
if not candidates:
|
||||
candidates = materials
|
||||
if not candidates:
|
||||
max_duration = max((m.duration for m in materials), default=0)
|
||||
logger.warning(
|
||||
f"批量素材匹配失败: 分类 '{scene_name}' -> '{category.name}' 无足够时长的素材 (需 >= {required_duration}s, 最大可用: {max_duration}s)"
|
||||
)
|
||||
results.append(None)
|
||||
continue
|
||||
|
||||
|
||||
@@ -418,7 +418,8 @@ export default function VoiceMaterialLibrary() {
|
||||
<div style={{ color: 'var(--text-secondary)' }}>
|
||||
<div style={{ fontSize: 'var(--font-sm)' }}>点击选择文件</div>
|
||||
<div style={{ fontSize: 'var(--font-xs)', marginTop: 6, lineHeight: 1.6 }}>
|
||||
支持 MP3 / M4A / WAV / MP4,人声干净无杂音,时长 10 秒 ~ 2 分钟,不超过 20MB
|
||||
<div>支持 MP3 / M4A / WAV / MP4</div>
|
||||
<div>人声干净无杂音,时长 10 秒 ~ 2 分钟,不超过 20MB</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
align-items: flex-start;
|
||||
justify-content: center;
|
||||
gap: 3px;
|
||||
min-height: 38px;
|
||||
min-height: 64px;
|
||||
border-radius: 10px;
|
||||
border: 1px solid #e8e8e8;
|
||||
background: #fff;
|
||||
|
||||
Reference in New Issue
Block a user