""" 七牛云对象存储服务 ==================== 提供音频、视频文件的上传、管理和访问功能。 使用场景: 1. 声音复刻 - 上传音频样本文件 2. 音频生成 - 存储 TTS 生成的音频 3. 视频素材 - 上传视频文件用于后续处理 """ import asyncio import logging import mimetypes import uuid from datetime import datetime from pathlib import Path from typing import BinaryIO logger = logging.getLogger(__name__) import qiniu from qiniu import Auth, BucketManager, CdnManager, put_file, put_stream from app.config import get_settings # 调大七牛云 SDK 全局超时(默认 30s 对大文件上传不够) qiniu.config.set_default(connection_timeout=120) class QiniuService: """ 七牛云服务封装 封装了常用的文件上传、下载、管理操作, 专为美家卡智影项目的音视频文件处理场景设计。 """ # 文件类型目录映射 TYPE_DIRECTORIES = { "audio": "audios", "video": "videos", "image": "images", } # 允许的文件类型 ALLOWED_AUDIO_TYPES = { "audio/mpeg", "audio/mp3", "audio/wav", "audio/x-m4a", "audio/aac", "audio/ogg", } ALLOWED_VIDEO_TYPES = {"video/mp4", "video/quicktime", "video/x-msvideo", "video/webm"} ALLOWED_IMAGE_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp"} def __init__(self): """ 初始化七牛云服务 支持多 bucket: - 图片: img-liche / img.liche.cn - 视频/音频: media-liche / media.liche.cn """ settings = get_settings() self.access_key = settings.QINIU_ACCESS_KEY self.secret_key = settings.QINIU_SECRET_KEY # 图片 bucket 配置 self.image_bucket = settings.QINIU_IMAGE_BUCKET self.image_domain = settings.QINIU_IMAGE_DOMAIN # 视频/音频 bucket 配置 self.video_bucket = settings.QINIU_VIDEO_BUCKET self.video_domain = settings.QINIU_VIDEO_DOMAIN if not all([self.access_key, self.secret_key]): raise ValueError( "七牛云配置不完整,请设置环境变量: " "QINIU_ACCESS_KEY, QINIU_SECRET_KEY" ) # 初始化认证和管理器 self.auth = Auth(self.access_key, self.secret_key) self.bucket = BucketManager(self.auth) self.cdn = CdnManager(self.auth) def _get_bucket_and_domain(self, file_type: str) -> tuple[str, str]: """ 根据文件类型获取对应的 bucket 和 domain Args: file_type: 文件类型 (audio/video/image) Returns: (bucket_name, domain) """ if file_type == "image": return self.image_bucket, self.image_domain # video, audio 都用视频 bucket return self.video_bucket, self.video_domain # 项目前缀 PROJECT_PREFIX = "meijiaka-zy" def generate_key( self, file_type: str, original_filename: str, user_id: str | None = None ) -> str: """ 生成规范的文件存储路径 格式: meijiaka-zy/{type}/{date}/{uuid}.{ext} Args: file_type: 文件类型 (audio/video/image/voice_clone/tts_output) original_filename: 原始文件名 user_id: 用户ID(可选,用于目录隔离) Returns: 文件存储 Key """ # 获取文件扩展名 ext = Path(original_filename).suffix.lower() if not ext: # 根据 file_type 设置默认扩展名 ext_map = {"audio": ".mp3", "video": ".mp4", "image": ".jpg"} ext = ext_map.get(file_type, ".bin") # 生成唯一标识 unique_id = str(uuid.uuid4())[:12] # 获取类型目录 type_dir = self.TYPE_DIRECTORIES.get(file_type, "others") # 构建路径(带项目前缀) date_str = datetime.now().strftime("%Y%m") if user_id: return f"{self.PROJECT_PREFIX}/{type_dir}/{user_id}/{date_str}/{unique_id}{ext}" return f"{self.PROJECT_PREFIX}/{type_dir}/{date_str}/{unique_id}{ext}" def validate_file_type(self, mime_type: str, allowed_types: set) -> bool: """验证文件 MIME 类型是否在允许列表中""" return mime_type in allowed_types def get_upload_token( self, bucket: str, key: str, expires: int = 3600, policy: dict | None = None ) -> str: """ 生成上传凭证(客户端直传使用) Args: bucket: 存储空间名称 key: 文件存储 Key expires: Token 有效期(秒),默认 1 小时 policy: 自定义上传策略(可选) Returns: 上传 Token 字符串 """ return self.auth.upload_token(bucket, key, expires, policy) def _calculate_file_hash(self, local_path: Path) -> str: """计算文件的 MD5 哈希""" import hashlib md5 = hashlib.md5() # nosec: B324 — 七牛云 ETag 协议强制使用 MD5 with open(local_path, "rb") as f: for chunk in iter(lambda: f.read(8192), b""): md5.update(chunk) return md5.hexdigest() def _find_file_by_hash(self, bucket: str, file_hash: str) -> dict | None: """ 根据文件哈希查找已存在的文件 Args: bucket: 存储空间名称 file_hash: 文件 MD5 哈希 Returns: 文件信息或 None """ # 列举最近上传的 1000 个文件进行比对 ret, eof, info = self.bucket.list(bucket, prefix=f"{self.PROJECT_PREFIX}/", limit=1000) if ret and "items" in ret: for item in ret["items"]: if item.get("hash") == file_hash: return { "key": item["key"], "hash": item["hash"], "fsize": item["fsize"], "mime_type": item.get("mimeType", "application/octet-stream"), } return None def upload_file( self, local_path: str, key: str | None = None, file_type: str = "audio", user_id: str | None = None, check_duplicate: bool = True, ) -> dict: """ 上传本地文件到七牛云 Args: local_path: 本地文件路径 key: 指定存储 Key(可选,不指定则自动生成) file_type: 文件类型,用于自动生成 Key 和选择 bucket user_id: 用户ID(可选) check_duplicate: 是否检查重复文件(默认开启) Returns: { "key": 文件Key, "hash": 文件哈希, "url": 访问URL, "mime_type": MIME类型, "fsize": 文件大小(字节), "is_duplicate": 是否复用已有文件 } """ local_path_obj = Path(local_path) if not local_path_obj.exists(): raise FileNotFoundError(f"文件不存在: {local_path_obj}") # 根据文件类型获取对应的 bucket 和 domain bucket, domain = self._get_bucket_and_domain(file_type) # 计算文件 MD5 哈希 file_md5 = self._calculate_file_hash(local_path_obj) # 检查是否已存在相同文件 if check_duplicate: existing = self._find_file_by_hash(bucket, file_md5) if existing: return { "key": existing["key"], "hash": existing["hash"], "url": self.get_file_url(domain, existing["key"]), "mimeType": existing.get("mime_type", "application/octet-stream"), "fsize": existing["fsize"], "isDuplicate": True, "message": "文件已存在,直接复用", } # 自动生成 Key if key is None: key = self.generate_key(file_type, local_path_obj.name, user_id) # 生成上传 Token token = self.get_upload_token(bucket, key) # 使用分片上传 ret, info = put_file(up_token=token, key=key, file_path=str(local_path_obj)) if ret is None: raise Exception(f"上传失败: {info}") # 获取文件信息 mime_type, _ = mimetypes.guess_type(str(local_path_obj)) fsize = local_path_obj.stat().st_size return { "key": ret["key"], "hash": ret["hash"], "url": self.get_file_url(domain, key), "mimeType": mime_type or "application/octet-stream", "fsize": fsize, "isDuplicate": False, } def upload_stream( self, stream: BinaryIO, key: str, mime_type: str = "application/octet-stream", bucket: str | None = None, domain: str | None = None, ) -> dict: """ 上传文件流到七牛云 Args: stream: 文件流对象 key: 文件存储 Key mime_type: 文件 MIME 类型 bucket: 存储空间名称(默认 video_bucket) domain: 加速域名(默认 video_domain) Returns: 上传结果字典 """ bucket = bucket or self.video_bucket domain = domain or self.video_domain token = self.get_upload_token(bucket, key) # 获取流大小并重置指针到开头 stream.seek(0, 2) data_size = stream.tell() stream.seek(0) ret, info = put_stream( up_token=token, key=key, input_stream=stream, file_name=key, data_size=data_size, params=None, mime_type=mime_type, ) if ret is None: raise Exception(f"上传失败: {info}") return {"key": ret["key"], "hash": ret["hash"], "url": self.get_file_url(domain, key)} def upload_audio( self, local_path: str, user_id: str | None = None, key: str | None = None ) -> dict: """ 上传音频文件(专用接口) Args: local_path: 本地音频文件路径 user_id: 用户ID(可选) key: 指定 Key(可选) Returns: 上传结果 """ # 验证文件类型 mime_type, _ = mimetypes.guess_type(local_path) if mime_type and not self.validate_file_type(mime_type, self.ALLOWED_AUDIO_TYPES): raise ValueError(f"不支持的音频格式: {mime_type}") return self.upload_file(local_path=local_path, key=key, file_type="audio", user_id=user_id) def upload_video( self, local_path: str, user_id: str | None = None, key: str | None = None ) -> dict: """ 上传视频文件(专用接口) Args: local_path: 本地视频文件路径 user_id: 用户ID(可选) key: 指定 Key(可选) Returns: 上传结果 """ # 验证文件类型 mime_type, _ = mimetypes.guess_type(local_path) if mime_type and not self.validate_file_type(mime_type, self.ALLOWED_VIDEO_TYPES): raise ValueError(f"不支持的视频格式: {mime_type}") return self.upload_file(local_path=local_path, key=key, file_type="video", user_id=user_id) def get_file_url(self, domain: str, key: str, expires: int = 0) -> str: """ 获取文件访问 URL Args: domain: 加速域名 key: 文件 Key expires: 过期时间(秒),0 表示永久(公有空间) Returns: 文件访问 URL """ base_url = f"https://{domain}/{key}" if expires > 0: # 生成私有链接(临时 URL) return self.auth.private_download_url(base_url, expires) return base_url def delete_file(self, bucket: str, key: str) -> bool: """ 删除文件 Args: bucket: 存储空间名称 key: 文件 Key Returns: 是否删除成功 """ ret, info = self.bucket.delete(bucket, key) return ret == {} def get_file_info(self, bucket: str, key: str) -> dict | None: """ 获取文件元信息 Args: bucket: 存储空间名称 key: 文件 Key Returns: 文件信息字典,文件不存在返回 None """ ret, info = self.bucket.stat(bucket, key) if ret is None: return None # 根据 key 前缀推断文件类型,获取对应的 domain file_type = "video" # 默认 if "/images/" in key: file_type = "image" _, domain = self._get_bucket_and_domain(file_type) return { "key": key, "fsize": ret.get("fsize"), "hash": ret.get("hash"), "mime_type": ret.get("mimeType"), "put_time": ret.get("putTime"), "type": ret.get("type"), "url": self.get_file_url(domain, key), } def refresh_cdn(self, keys: list[str]) -> dict: """ 刷新 CDN 缓存 Args: keys: 文件 Key 列表 Returns: 刷新结果 """ urls = [] for key in keys: # 根据 key 推断文件类型获取 domain file_type = "video" if "/images/" in key: file_type = "image" _, domain = self._get_bucket_and_domain(file_type) urls.append(self.get_file_url(domain, key)) ret, info = self.cdn.refresh_urls(urls) return { "code": ret.get("code"), "request_id": info.req_id if hasattr(info, "req_id") else None, } # ------------------------------------------------------------------ # Async 包装(七牛云 SDK 底层为同步 requests,在 FastAPI async 路由中 # 直接调用会阻塞事件循环。以下方法通过 asyncio.to_thread() offload # 到线程池执行,避免阻塞。) # ------------------------------------------------------------------ async def upload_file_async( self, local_path: str, key: str | None = None, file_type: str = "audio", user_id: str | None = None, check_duplicate: bool = True, ) -> dict: """异步版本 upload_file""" return await asyncio.to_thread( self.upload_file, local_path, key, file_type, user_id, check_duplicate ) async def upload_stream_async( self, stream: BinaryIO, key: str, mime_type: str = "application/octet-stream", bucket: str | None = None, domain: str | None = None, ) -> dict: """异步版本 upload_stream""" return await asyncio.to_thread(self.upload_stream, stream, key, mime_type, bucket, domain) async def upload_audio_async( self, local_path: str, user_id: str | None = None, key: str | None = None ) -> dict: """异步版本 upload_audio""" return await asyncio.to_thread(self.upload_audio, local_path, user_id, key) async def upload_video_async( self, local_path: str, user_id: str | None = None, key: str | None = None ) -> dict: """异步版本 upload_video""" return await asyncio.to_thread(self.upload_video, local_path, user_id, key) async def delete_file_async(self, bucket: str, key: str) -> bool: """异步版本 delete_file""" return await asyncio.to_thread(self.delete_file, bucket, key) async def get_file_info_async(self, bucket: str, key: str) -> dict | None: """异步版本 get_file_info""" return await asyncio.to_thread(self.get_file_info, bucket, key) async def refresh_cdn_async(self, keys: list[str]) -> dict: """异步版本 refresh_cdn""" return await asyncio.to_thread(self.refresh_cdn, keys) # 全局单例 _qiniu_service: QiniuService | None = None def get_qiniu_service() -> QiniuService: """获取 QiniuService 单例""" global _qiniu_service if _qiniu_service is None: _qiniu_service = QiniuService() return _qiniu_service def rebuild_qiniu_service() -> QiniuService: """重建 QiniuService(配置热重载后调用)""" global _qiniu_service _qiniu_service = None return get_qiniu_service()