From 47bb987e065aec7f3c386813d24697ee14ced324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E9=B1=BC=E5=BC=80=E5=8F=91?= Date: Fri, 5 Jun 2026 17:26:30 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E7=B4=A0=E6=9D=90=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E4=B8=8D=E5=8F=AF=E8=A7=81=E5=AD=97=E7=AC=A6?= =?UTF-8?q?=20+=20=E6=97=B6=E9=95=BF=E5=AE=B9=E9=94=99=20+=20UI=20?= =?UTF-8?q?=E7=BB=86=E8=8A=82=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - material_service: 精确查询失败后全量内存标准化匹配,兼容数据库 name 含不可见字符 - material_service: 素材时长过滤放宽到 70% 兜底,避免打轴合并导致匹配失败 - material_service: 增加详细 warn 日志,便于诊断未匹配原因 - broll_category: 新增 get_by_level 方法供全量查询使用 - VoiceMaterialLibrary: 上传弹窗文案换行显示 - ScriptCreation: 主题卡片 min-height 64px 修复文字截断 --- python-api/app/crud/broll_category.py | 12 +++ python-api/app/services/material_service.py | 95 ++++++++++++++----- .../VoiceMaterialLibrary.tsx | 3 +- .../pages/VideoCreation/ScriptCreation.css | 2 +- 4 files changed, 84 insertions(+), 28 deletions(-) diff --git a/python-api/app/crud/broll_category.py b/python-api/app/crud/broll_category.py index f1155e1..3cc42b2 100644 --- a/python-api/app/crud/broll_category.py +++ b/python-api/app/crud/broll_category.py @@ -70,6 +70,18 @@ class BrollCategoryCRUD(CRUDBase[BrollCategory]): ) return result.scalar_one_or_none() + async def get_by_level( + self, db: AsyncSession, *, level: int + ) -> list[BrollCategory]: + """根据层级获取所有启用的分类""" + result = await db.execute( + select(BrollCategory).where( + BrollCategory.level == level, + BrollCategory.status == "active", + ) + ) + return list(result.scalars().all()) + # 导出实例 broll_category = BrollCategoryCRUD() diff --git a/python-api/app/services/material_service.py b/python-api/app/services/material_service.py index 7aac534..d74824c 100644 --- a/python-api/app/services/material_service.py +++ b/python-api/app/services/material_service.py @@ -26,7 +26,12 @@ _USED_MATERIALS_TTL = 7 * 24 * 3600 def _normalize_scene(scene: str) -> str: """标准化场景描述,用于匹配三级分类 name""" # 去除所有 Unicode 空白字符(空格、全角空格、换行、tab 等) - return re.sub(r"\s+", "", scene) + cleaned = re.sub(r"\s+", "", scene) + # 去除常见中文标点符号(逗号、句号、感叹号、问号、顿号、分号、冒号、引号、括号等) + cleaned = re.sub(r"[,。!?、;:""''()【】《》]+", "", cleaned) + # 去除零宽字符(零宽空格、零宽非连接符、零宽连接符、零宽非断空格等) + cleaned = re.sub(r"[\u200b-\u200f\ufeff]+", "", cleaned) + return cleaned def _weighted_choice(materials: list) -> object: # noqa: ANN001 @@ -155,11 +160,21 @@ async def match_material( normalized = _normalize_scene(scene) - # 1. 查找三级分类(精确匹配 + 顺序颠倒兜底) + # 1. 查找三级分类(精确匹配 -> 全量内存匹配兜底 -> 顺序颠倒 -> 上级回退) category = await broll_category.get_by_name_and_level( db, name=normalized, level=3 ) - # 若精确匹配失败,尝试将 "A-B" 倒序为 "B-A" 再匹配 + # 精确匹配失败时,全量查询后在内存标准化匹配(兼容数据库 name 含不可见字符) + if category is None: + all_categories = await broll_category.get_by_level(db, level=3) + for c in all_categories: + if _normalize_scene(c.name) == normalized: + category = c + logger.info( + f"素材分类全量内存匹配命中: '{normalized}' -> '{c.name}'" + ) + break + # 若仍失败,尝试将 "A-B" 倒序为 "B-A" 再匹配 if category is None: parts = normalized.rsplit("-", 1) if len(parts) == 2: @@ -179,16 +194,27 @@ async def match_material( f"素材回退到上级分类命中: '{normalized}' -> '{category.name}'" ) if category is None: - logger.debug(f"未找到分类: {normalized}") + logger.warning(f"素材匹配失败: 未找到分类 '{normalized}' (原始 scene: '{scene}')") return None - # 2. 查询候选素材 - materials = await broll_material.get_active_by_category_and_duration( - db, category_id=category.id, min_duration=required_duration + # 2. 查询该分类下所有 active 素材(先不过滤时长,用于日志诊断) + all_materials = await broll_material.get_active_by_categories( + db, category_ids=[category.id] ) + if not all_materials: + logger.warning(f"素材匹配失败: 分类 '{normalized}' 下无任何可用素材") + return None + + # 按时长过滤(优先严格匹配,失败时逐步放宽到 70% 兜底) + materials = [m for m in all_materials if m.duration >= required_duration] if not materials: - logger.debug( - f"分类 {normalized} 无足够时长的素材 (需 >= {required_duration}s)" + materials = [m for m in all_materials if m.duration >= required_duration * 0.7] + if not materials: + materials = all_materials + if not materials: + max_duration = max(m.duration for m in all_materials) + logger.warning( + f"素材匹配失败: 分类 '{normalized}' 无足够时长的素材 (需 >= {required_duration}s, 最大可用: {max_duration}s)" ) return None @@ -255,31 +281,36 @@ async def batch_match( normalized_scenes = [_normalize_scene(s["scene"]) for s in scenes] unique_names = list(set(normalized_scenes)) - # 2. 批量查询分类(1 次 DB)—— 同时查询原始名和倒序名 - reversed_names: list[str] = [] - name_to_reversed: dict[str, str] = {} - for name in unique_names: - parts = name.rsplit("-", 1) - if len(parts) == 2: - rev = f"{parts[1]}-{parts[0]}" - reversed_names.append(rev) - name_to_reversed[name] = rev - - all_query_names = unique_names + reversed_names + # 2. 批量查询分类:优先精确查询,失败时全量内存匹配兜底 categories = await broll_category.get_by_names_and_level( - db, names=all_query_names, level=3 + db, names=unique_names, level=3 ) category_map: dict[str, object] = {} for c in categories: - category_map[c.name] = c + category_map[_normalize_scene(c.name)] = c + + # 收集未命中的 name,准备全量兜底 + unmatched_by_exact = [name for name in unique_names if name not in category_map] + if unmatched_by_exact: + all_categories = await broll_category.get_by_level(db, level=3) + for c in all_categories: + normalized_db_name = _normalize_scene(c.name) + if normalized_db_name not in category_map: + category_map[normalized_db_name] = c + + # 构建原始 scene -> category 的映射 + reversed_map: dict[str, str] = {} + for name in unique_names: + parts = name.rsplit("-", 1) + if len(parts) == 2: + reversed_map[name] = f"{parts[1]}-{parts[0]}" - # 构建原始 scene -> category 的映射(优先精确匹配,fallback 倒序匹配) scene_to_category: dict[str, object] = {} for name in unique_names: if name in category_map: scene_to_category[name] = category_map[name] - elif name in name_to_reversed and name_to_reversed[name] in category_map: - rev = name_to_reversed[name] + elif name in reversed_map and reversed_map[name] in category_map: + rev = reversed_map[name] scene_to_category[name] = category_map[rev] logger.info( f"批量匹配顺序颠倒兜底命中: '{name}' -> '{rev}'" @@ -331,13 +362,25 @@ async def batch_match( category = scene_to_category.get(scene_name) if category is None: + original_scene = scenes[idx]["scene"] + logger.warning( + f"批量素材匹配失败: 未找到分类 '{scene_name}' (原始 scene: '{original_scene}')" + ) results.append(None) continue materials = materials_by_category.get(category.id, []) - # 按时长过滤 + # 按时长过滤(优先严格匹配,失败时逐步放宽到 70% 兜底) candidates = [m for m in materials if m.duration >= required_duration] if not candidates: + candidates = [m for m in materials if m.duration >= required_duration * 0.7] + if not candidates: + candidates = materials + if not candidates: + max_duration = max((m.duration for m in materials), default=0) + logger.warning( + f"批量素材匹配失败: 分类 '{scene_name}' -> '{category.name}' 无足够时长的素材 (需 >= {required_duration}s, 最大可用: {max_duration}s)" + ) results.append(None) continue diff --git a/tauri-app/src/pages/ContentManagement/VoiceMaterialLibrary.tsx b/tauri-app/src/pages/ContentManagement/VoiceMaterialLibrary.tsx index 725a13d..eb0d516 100644 --- a/tauri-app/src/pages/ContentManagement/VoiceMaterialLibrary.tsx +++ b/tauri-app/src/pages/ContentManagement/VoiceMaterialLibrary.tsx @@ -418,7 +418,8 @@ export default function VoiceMaterialLibrary() {
点击选择文件
- 支持 MP3 / M4A / WAV / MP4,人声干净无杂音,时长 10 秒 ~ 2 分钟,不超过 20MB +
支持 MP3 / M4A / WAV / MP4
+
人声干净无杂音,时长 10 秒 ~ 2 分钟,不超过 20MB
)} diff --git a/tauri-app/src/pages/VideoCreation/ScriptCreation.css b/tauri-app/src/pages/VideoCreation/ScriptCreation.css index daae770..82ec546 100644 --- a/tauri-app/src/pages/VideoCreation/ScriptCreation.css +++ b/tauri-app/src/pages/VideoCreation/ScriptCreation.css @@ -30,7 +30,7 @@ align-items: flex-start; justify-content: center; gap: 3px; - min-height: 38px; + min-height: 64px; border-radius: 10px; border: 1px solid #e8e8e8; background: #fff;