Files
meijiaka-zy/python-api/app/services/anytocopy_service.py
T

351 lines
11 KiB
Python

"""
AnyToCopy 视频文案提取服务
============================
支持 50+ 平台视频文案提取、视频去水印
文档: https://www.anytocopy.com/account/api/docs
"""
import asyncio
import logging
import re
import aiohttp
from pydantic import BaseModel, Field
from app.config import get_settings
logger = logging.getLogger(__name__)
class AnyToCopyConfig(BaseModel):
"""AnyToCopy 配置"""
api_key: str = Field(default="", description="API Key")
api_secret: str = Field(default="", description="API Secret")
base_url: str = Field(
default="https://api.anytocopy.com/vip/open-api/v1", description="API Base URL"
)
class VideoExtractResult(BaseModel):
"""视频提取结果"""
task_id: str
title: str = ""
content: str = ""
text_content: str = "" # 语音转文字文案
video_url: str = ""
audio_url: str = ""
cover: str = ""
platform: str = ""
duration: float = 0.0
status: str = "" # WAITING, SUCCESS, FAILURE
error_message: str = ""
class AnyToCopyService:
"""
AnyToCopy 视频文案提取服务
支持平台:
- 小红书 (xhs)
- 抖音 (douyin)
- 快手 (kuaishou)
- 等 50+ 平台
"""
# 支持的视频平台链接正则
PLATFORM_PATTERNS = {
"xiaohongshu": [
r"https?://(www\.)?xiaohongshu\.com/.*",
r"https?://xhslink\.com/[a-zA-Z0-9_-]+",
r"https?://(www\.)?xhs\.cn/.*",
],
"douyin": [
r"https?://(www\.)?douyin\.com/.*",
r"https?://v\.douyin\.com/[a-zA-Z0-9_-]+",
r"https?://(www\.)?iesdouyin\.com/.*",
],
"kuaishou": [
r"https?://(www\.)?kuaishou\.com/.*",
r"https?://v\.kuaishou\.com/[a-zA-Z0-9_-]+",
],
"bilibili": [
r"https?://(www\.)?bilibili\.com/.*",
r"https?://b23\.tv/[a-zA-Z0-9_-]+",
],
"weibo": [
r"https?://(www\.)?weibo\.com/.*",
r"https?://m\.weibo\.cn/.*",
],
}
def __init__(self, config: dict | None = None):
self.config = config or {}
self.api_key = self.config.get("api_key", "")
self.api_secret = self.config.get("api_secret", "")
self.base_url = self.config.get("base_url", "https://api.anytocopy.com/vip/open-api/v1")
def _get_headers(self) -> dict[str, str]:
"""获取请求头"""
return {
"X-API-Key": self.api_key,
"X-API-Secret": self.api_secret,
"Content-Type": "application/json",
}
@classmethod
def is_video_url(cls, text: str) -> bool:
"""
检测文本是否为视频链接
Args:
text: 输入文本
Returns:
bool: 是否为视频链接
"""
if not text or not isinstance(text, str):
return False
text = text.strip()
# 检查是否匹配任一平台链接模式
for platform, patterns in cls.PLATFORM_PATTERNS.items():
for pattern in patterns:
if re.match(pattern, text, re.IGNORECASE):
return True
return False
@classmethod
def extract_url_from_text(cls, text: str) -> str | None:
"""
从文本中提取视频链接
Args:
text: 可能包含链接的文本
Returns:
str | None: 提取的链接或 None
"""
if not text or not isinstance(text, str):
return None
# URL 正则匹配(排除中文标点和常见标点)
url_pattern = r"https?://[a-zA-Z0-9._~:/?#\[\]@!$&'()*+,;=%-]+"
urls = re.findall(url_pattern, text)
for url in urls:
# 清理尾部标点
url = url.rstrip("。,!?;:" "''()【】、")
if cls.is_video_url(url):
return url
return None
async def submit_task(self, work_url: str, task_type: str = "TEXT") -> dict:
"""
提交视频文案提取任务
Args:
work_url: 作品链接
task_type: 任务类型,默认 TEXT
Returns:
dict: 包含 taskId 或错误信息
"""
if not self.api_key or not self.api_secret:
return {"code": 500, "msg": "AnyToCopy API Key 未配置"}
url = f"{self.base_url}/video/extract"
params = {"workUrl": work_url, "taskType": task_type}
try:
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=self._get_headers(), params=params) as resp:
data = await resp.json()
logger.info(f"AnyToCopy submit_task response: {data}")
return data
except Exception as e:
logger.error(f"AnyToCopy submit_task error: {e}")
return {"code": 500, "msg": f"请求失败: {str(e)}"}
async def query_task(self, task_id: str) -> dict:
"""
查询任务状态和结果
Args:
task_id: 任务 ID
Returns:
dict: 任务状态和结果
"""
url = f"{self.base_url}/video/query"
params = {"taskId": task_id}
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=self._get_headers(), params=params) as resp:
data = await resp.json()
return data
except Exception as e:
logger.error(f"AnyToCopy query_task error: {e}")
return {"code": 500, "msg": f"请求失败: {str(e)}"}
async def extract_video_content(
self,
work_url: str,
max_retries: int = 60,
poll_interval: float = 3.0,
) -> VideoExtractResult:
"""
完整的视频提取流程(提交 + 轮询)
Args:
work_url: 作品链接
max_retries: 最大轮询次数
poll_interval: 轮询间隔(秒)
Returns:
VideoExtractResult: 提取结果
"""
# 1. 提交任务
submit_result = await self.submit_task(work_url)
if submit_result.get("code") != 200:
return VideoExtractResult(
task_id="",
status="FAILURE",
error_message=submit_result.get("msg", "提交任务失败"),
)
task_id = submit_result["data"]
logger.info(f"AnyToCopy task submitted, taskId: {task_id}")
# 2. 轮询查询
for i in range(max_retries):
await asyncio.sleep(poll_interval)
query_result = await self.query_task(task_id)
if query_result.get("code") != 200:
continue
data = query_result.get("data", {})
status = data.get("status")
if status == "SUCCESS":
logger.info(f"AnyToCopy task {task_id} completed successfully")
return VideoExtractResult(
task_id=task_id,
title=data.get("title", ""),
content=data.get("content", ""),
text_content=data.get("textContent", ""),
video_url=data.get("videoUrl", ""),
audio_url=data.get("audioUrl", ""),
cover=data.get("cover", ""),
platform=data.get("platform", ""),
duration=data.get("duration", 0.0),
status="SUCCESS",
error_message=data.get("errorMessage", ""),
)
elif status in ("FAILURE", "FAILED"):
logger.error(f"AnyToCopy task {task_id} failed: {data.get('errorMessage')}")
return VideoExtractResult(
task_id=task_id,
status="FAILURE",
error_message=data.get("errorMessage", "任务执行失败"),
)
else:
logger.debug(
f"AnyToCopy task {task_id} status: {status}, retry {i+1}/{max_retries}"
)
# 轮询超时
logger.warning(f"AnyToCopy task {task_id} polling timeout")
return VideoExtractResult(
task_id=task_id,
status="TIMEOUT",
error_message="轮询超时,任务未完成",
)
async def extract_text_from_input(self, user_input: str) -> dict:
"""
智能提取输入中的文案
- 如果是视频链接,提取视频文案
- 如果不是链接,返回原文
Args:
user_input: 用户输入(可能是链接或文案)
Returns:
dict: {
"is_video_url": bool,
"original_input": str,
"extracted_text": str,
"video_info": VideoExtractResult | None,
"error": str | None,
}
"""
result = {
"is_video_url": False,
"original_input": user_input,
"extracted_text": user_input,
"video_info": None,
"error": None,
}
# 检查是否为视频链接
url = self.extract_url_from_text(user_input)
if not url:
# 不是链接,直接返回原文
return result
# 是视频链接,提取文案
result["is_video_url"] = True
if not self.api_key or not self.api_secret:
result["error"] = "AnyToCopy API Key 未配置,无法提取视频文案"
return result
try:
video_result = await self.extract_video_content(url)
result["video_info"] = video_result
if video_result.status == "SUCCESS":
# 优先使用语音转文字文案,其次使用正文内容
extracted_text = (
video_result.text_content or video_result.content or video_result.title
)
result["extracted_text"] = extracted_text
logger.info(f"AnyToCopy extracted text length: {len(extracted_text)}")
else:
result["error"] = video_result.error_message or "视频文案提取失败"
except Exception as e:
logger.error(f"AnyToCopy extract_text_from_input error: {e}")
result["error"] = f"提取失败: {str(e)}"
return result
# 全局单例
_anytocopy_service: AnyToCopyService | None = None
def get_anytocopy_service() -> AnyToCopyService:
"""获取 AnyToCopyService 单例"""
global _anytocopy_service
if _anytocopy_service is None:
# 从 Settings 加载配置
settings = get_settings()
config = {
"api_key": settings.ANYTOCOPY_API_KEY or "",
"api_secret": settings.ANYTOCOPY_API_SECRET or "",
"base_url": settings.ANYTOCOPY_BASE_URL,
}
_anytocopy_service = AnyToCopyService(config)
return _anytocopy_service