351 lines
11 KiB
Python
351 lines
11 KiB
Python
"""
|
|
AnyToCopy 视频文案提取服务
|
|
============================
|
|
|
|
支持 50+ 平台视频文案提取、视频去水印
|
|
文档: https://www.anytocopy.com/account/api/docs
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import re
|
|
|
|
import aiohttp
|
|
from pydantic import BaseModel, Field
|
|
|
|
from app.config import get_settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AnyToCopyConfig(BaseModel):
|
|
"""AnyToCopy 配置"""
|
|
|
|
api_key: str = Field(default="", description="API Key")
|
|
api_secret: str = Field(default="", description="API Secret")
|
|
base_url: str = Field(
|
|
default="https://api.anytocopy.com/vip/open-api/v1", description="API Base URL"
|
|
)
|
|
|
|
|
|
class VideoExtractResult(BaseModel):
|
|
"""视频提取结果"""
|
|
|
|
task_id: str
|
|
title: str = ""
|
|
content: str = ""
|
|
text_content: str = "" # 语音转文字文案
|
|
video_url: str = ""
|
|
audio_url: str = ""
|
|
cover: str = ""
|
|
platform: str = ""
|
|
duration: float = 0.0
|
|
status: str = "" # WAITING, SUCCESS, FAILURE
|
|
error_message: str = ""
|
|
|
|
|
|
class AnyToCopyService:
|
|
"""
|
|
AnyToCopy 视频文案提取服务
|
|
|
|
支持平台:
|
|
- 小红书 (xhs)
|
|
- 抖音 (douyin)
|
|
- 快手 (kuaishou)
|
|
- 等 50+ 平台
|
|
"""
|
|
|
|
# 支持的视频平台链接正则
|
|
PLATFORM_PATTERNS = {
|
|
"xiaohongshu": [
|
|
r"https?://(www\.)?xiaohongshu\.com/.*",
|
|
r"https?://xhslink\.com/[a-zA-Z0-9_-]+",
|
|
r"https?://(www\.)?xhs\.cn/.*",
|
|
],
|
|
"douyin": [
|
|
r"https?://(www\.)?douyin\.com/.*",
|
|
r"https?://v\.douyin\.com/[a-zA-Z0-9_-]+",
|
|
r"https?://(www\.)?iesdouyin\.com/.*",
|
|
],
|
|
"kuaishou": [
|
|
r"https?://(www\.)?kuaishou\.com/.*",
|
|
r"https?://v\.kuaishou\.com/[a-zA-Z0-9_-]+",
|
|
],
|
|
"bilibili": [
|
|
r"https?://(www\.)?bilibili\.com/.*",
|
|
r"https?://b23\.tv/[a-zA-Z0-9_-]+",
|
|
],
|
|
"weibo": [
|
|
r"https?://(www\.)?weibo\.com/.*",
|
|
r"https?://m\.weibo\.cn/.*",
|
|
],
|
|
}
|
|
|
|
def __init__(self, config: dict | None = None):
|
|
self.config = config or {}
|
|
self.api_key = self.config.get("api_key", "")
|
|
self.api_secret = self.config.get("api_secret", "")
|
|
self.base_url = self.config.get("base_url", "https://api.anytocopy.com/vip/open-api/v1")
|
|
|
|
def _get_headers(self) -> dict[str, str]:
|
|
"""获取请求头"""
|
|
return {
|
|
"X-API-Key": self.api_key,
|
|
"X-API-Secret": self.api_secret,
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
@classmethod
|
|
def is_video_url(cls, text: str) -> bool:
|
|
"""
|
|
检测文本是否为视频链接
|
|
|
|
Args:
|
|
text: 输入文本
|
|
|
|
Returns:
|
|
bool: 是否为视频链接
|
|
"""
|
|
if not text or not isinstance(text, str):
|
|
return False
|
|
|
|
text = text.strip()
|
|
|
|
# 检查是否匹配任一平台链接模式
|
|
for platform, patterns in cls.PLATFORM_PATTERNS.items():
|
|
for pattern in patterns:
|
|
if re.match(pattern, text, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|
|
|
|
@classmethod
|
|
def extract_url_from_text(cls, text: str) -> str | None:
|
|
"""
|
|
从文本中提取视频链接
|
|
|
|
Args:
|
|
text: 可能包含链接的文本
|
|
|
|
Returns:
|
|
str | None: 提取的链接或 None
|
|
"""
|
|
if not text or not isinstance(text, str):
|
|
return None
|
|
|
|
# URL 正则匹配(排除中文标点和常见标点)
|
|
url_pattern = r"https?://[a-zA-Z0-9._~:/?#\[\]@!$&'()*+,;=%-]+"
|
|
urls = re.findall(url_pattern, text)
|
|
|
|
for url in urls:
|
|
# 清理尾部标点
|
|
url = url.rstrip("。,!?;:" "''()【】、")
|
|
if cls.is_video_url(url):
|
|
return url
|
|
|
|
return None
|
|
|
|
async def submit_task(self, work_url: str, task_type: str = "TEXT") -> dict:
|
|
"""
|
|
提交视频文案提取任务
|
|
|
|
Args:
|
|
work_url: 作品链接
|
|
task_type: 任务类型,默认 TEXT
|
|
|
|
Returns:
|
|
dict: 包含 taskId 或错误信息
|
|
"""
|
|
if not self.api_key or not self.api_secret:
|
|
return {"code": 500, "msg": "AnyToCopy API Key 未配置"}
|
|
|
|
url = f"{self.base_url}/video/extract"
|
|
params = {"workUrl": work_url, "taskType": task_type}
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.post(url, headers=self._get_headers(), params=params) as resp:
|
|
data = await resp.json()
|
|
logger.info(f"AnyToCopy submit_task response: {data}")
|
|
return data
|
|
except Exception as e:
|
|
logger.error(f"AnyToCopy submit_task error: {e}")
|
|
return {"code": 500, "msg": f"请求失败: {str(e)}"}
|
|
|
|
async def query_task(self, task_id: str) -> dict:
|
|
"""
|
|
查询任务状态和结果
|
|
|
|
Args:
|
|
task_id: 任务 ID
|
|
|
|
Returns:
|
|
dict: 任务状态和结果
|
|
"""
|
|
url = f"{self.base_url}/video/query"
|
|
params = {"taskId": task_id}
|
|
|
|
try:
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(url, headers=self._get_headers(), params=params) as resp:
|
|
data = await resp.json()
|
|
return data
|
|
except Exception as e:
|
|
logger.error(f"AnyToCopy query_task error: {e}")
|
|
return {"code": 500, "msg": f"请求失败: {str(e)}"}
|
|
|
|
async def extract_video_content(
|
|
self,
|
|
work_url: str,
|
|
max_retries: int = 60,
|
|
poll_interval: float = 3.0,
|
|
) -> VideoExtractResult:
|
|
"""
|
|
完整的视频提取流程(提交 + 轮询)
|
|
|
|
Args:
|
|
work_url: 作品链接
|
|
max_retries: 最大轮询次数
|
|
poll_interval: 轮询间隔(秒)
|
|
|
|
Returns:
|
|
VideoExtractResult: 提取结果
|
|
"""
|
|
# 1. 提交任务
|
|
submit_result = await self.submit_task(work_url)
|
|
if submit_result.get("code") != 200:
|
|
return VideoExtractResult(
|
|
task_id="",
|
|
status="FAILURE",
|
|
error_message=submit_result.get("msg", "提交任务失败"),
|
|
)
|
|
|
|
task_id = submit_result["data"]
|
|
logger.info(f"AnyToCopy task submitted, taskId: {task_id}")
|
|
|
|
# 2. 轮询查询
|
|
for i in range(max_retries):
|
|
await asyncio.sleep(poll_interval)
|
|
|
|
query_result = await self.query_task(task_id)
|
|
if query_result.get("code") != 200:
|
|
continue
|
|
|
|
data = query_result.get("data", {})
|
|
status = data.get("status")
|
|
|
|
if status == "SUCCESS":
|
|
logger.info(f"AnyToCopy task {task_id} completed successfully")
|
|
return VideoExtractResult(
|
|
task_id=task_id,
|
|
title=data.get("title", ""),
|
|
content=data.get("content", ""),
|
|
text_content=data.get("textContent", ""),
|
|
video_url=data.get("videoUrl", ""),
|
|
audio_url=data.get("audioUrl", ""),
|
|
cover=data.get("cover", ""),
|
|
platform=data.get("platform", ""),
|
|
duration=data.get("duration", 0.0),
|
|
status="SUCCESS",
|
|
error_message=data.get("errorMessage", ""),
|
|
)
|
|
elif status in ("FAILURE", "FAILED"):
|
|
logger.error(f"AnyToCopy task {task_id} failed: {data.get('errorMessage')}")
|
|
return VideoExtractResult(
|
|
task_id=task_id,
|
|
status="FAILURE",
|
|
error_message=data.get("errorMessage", "任务执行失败"),
|
|
)
|
|
else:
|
|
logger.debug(
|
|
f"AnyToCopy task {task_id} status: {status}, retry {i+1}/{max_retries}"
|
|
)
|
|
|
|
# 轮询超时
|
|
logger.warning(f"AnyToCopy task {task_id} polling timeout")
|
|
return VideoExtractResult(
|
|
task_id=task_id,
|
|
status="TIMEOUT",
|
|
error_message="轮询超时,任务未完成",
|
|
)
|
|
|
|
async def extract_text_from_input(self, user_input: str) -> dict:
|
|
"""
|
|
智能提取输入中的文案
|
|
|
|
- 如果是视频链接,提取视频文案
|
|
- 如果不是链接,返回原文
|
|
|
|
Args:
|
|
user_input: 用户输入(可能是链接或文案)
|
|
|
|
Returns:
|
|
dict: {
|
|
"is_video_url": bool,
|
|
"original_input": str,
|
|
"extracted_text": str,
|
|
"video_info": VideoExtractResult | None,
|
|
"error": str | None,
|
|
}
|
|
"""
|
|
result = {
|
|
"is_video_url": False,
|
|
"original_input": user_input,
|
|
"extracted_text": user_input,
|
|
"video_info": None,
|
|
"error": None,
|
|
}
|
|
|
|
# 检查是否为视频链接
|
|
url = self.extract_url_from_text(user_input)
|
|
if not url:
|
|
# 不是链接,直接返回原文
|
|
return result
|
|
|
|
# 是视频链接,提取文案
|
|
result["is_video_url"] = True
|
|
|
|
if not self.api_key or not self.api_secret:
|
|
result["error"] = "AnyToCopy API Key 未配置,无法提取视频文案"
|
|
return result
|
|
|
|
try:
|
|
video_result = await self.extract_video_content(url)
|
|
result["video_info"] = video_result
|
|
|
|
if video_result.status == "SUCCESS":
|
|
# 优先使用语音转文字文案,其次使用正文内容
|
|
extracted_text = (
|
|
video_result.text_content or video_result.content or video_result.title
|
|
)
|
|
result["extracted_text"] = extracted_text
|
|
logger.info(f"AnyToCopy extracted text length: {len(extracted_text)}")
|
|
else:
|
|
result["error"] = video_result.error_message or "视频文案提取失败"
|
|
|
|
except Exception as e:
|
|
logger.error(f"AnyToCopy extract_text_from_input error: {e}")
|
|
result["error"] = f"提取失败: {str(e)}"
|
|
|
|
return result
|
|
|
|
|
|
# 全局单例
|
|
_anytocopy_service: AnyToCopyService | None = None
|
|
|
|
|
|
def get_anytocopy_service() -> AnyToCopyService:
|
|
"""获取 AnyToCopyService 单例"""
|
|
global _anytocopy_service
|
|
if _anytocopy_service is None:
|
|
# 从 Settings 加载配置
|
|
settings = get_settings()
|
|
|
|
config = {
|
|
"api_key": settings.ANYTOCOPY_API_KEY or "",
|
|
"api_secret": settings.ANYTOCOPY_API_SECRET or "",
|
|
"base_url": settings.ANYTOCOPY_BASE_URL,
|
|
}
|
|
_anytocopy_service = AnyToCopyService(config)
|
|
return _anytocopy_service
|