309 lines
8.4 KiB
Markdown
309 lines
8.4 KiB
Markdown
# 炼妖壶调用OpenManus集成方案
|
||
|
||
## 🎯 架构设计
|
||
|
||
```
|
||
炼妖壶 (Cauldron) ←→ OpenManus (爬虫服务)
|
||
↓ ↓
|
||
太公心易分析系统 Playwright爬虫引擎
|
||
↓ ↓
|
||
八仙论道辩论 Seeking Alpha数据
|
||
```
|
||
|
||
## 🔌 集成方式
|
||
|
||
### 1. **HTTP API调用** (推荐)
|
||
|
||
#### OpenManus端提供RESTful API
|
||
```python
|
||
# OpenManus项目中
|
||
from fastapi import FastAPI
|
||
from playwright.async_api import async_playwright
|
||
|
||
app = FastAPI()
|
||
|
||
@app.post("/scrape/seekingalpha")
|
||
async def scrape_seeking_alpha(request: ScrapeRequest):
|
||
async with async_playwright() as p:
|
||
browser = await p.chromium.launch(headless=True)
|
||
page = await browser.new_page()
|
||
|
||
# 设置反检测
|
||
await page.set_extra_http_headers({
|
||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)'
|
||
})
|
||
|
||
await page.goto(request.url)
|
||
content = await page.content()
|
||
await browser.close()
|
||
|
||
return {"content": content, "status": "success"}
|
||
```
|
||
|
||
#### 炼妖壶端调用
|
||
```python
|
||
# 在你的炼妖壶项目中
|
||
import httpx
|
||
|
||
class OpenManusClient:
|
||
def __init__(self, base_url: str, api_key: str = None):
|
||
self.base_url = base_url
|
||
self.api_key = api_key
|
||
self.client = httpx.AsyncClient()
|
||
|
||
async def scrape_seeking_alpha(self, url: str):
|
||
"""调用OpenManus爬取Seeking Alpha"""
|
||
headers = {}
|
||
if self.api_key:
|
||
headers['Authorization'] = f'Bearer {self.api_key}'
|
||
|
||
response = await self.client.post(
|
||
f"{self.base_url}/scrape/seekingalpha",
|
||
json={"url": url},
|
||
headers=headers
|
||
)
|
||
return response.json()
|
||
|
||
# 使用示例
|
||
openmanus = OpenManusClient("https://openmanus.your-domain.com")
|
||
result = await openmanus.scrape_seeking_alpha(
|
||
"https://seekingalpha.com/pr/20162773-ai-device-startup..."
|
||
)
|
||
```
|
||
|
||
### 2. **MCP协议集成** (最优雅)
|
||
|
||
#### OpenManus作为MCP服务
|
||
```python
|
||
# OpenManus项目中实现MCP服务器
|
||
from mcp import MCPServer
|
||
|
||
class OpenManusMCPServer(MCPServer):
|
||
def __init__(self):
|
||
super().__init__("openmanus-scraper")
|
||
self.register_tool("scrape_seeking_alpha", self.scrape_seeking_alpha)
|
||
|
||
async def scrape_seeking_alpha(self, url: str, extract_type: str = "article"):
|
||
"""MCP工具:爬取Seeking Alpha内容"""
|
||
# Playwright爬虫逻辑
|
||
return {
|
||
"url": url,
|
||
"title": extracted_title,
|
||
"content": extracted_content,
|
||
"metadata": metadata
|
||
}
|
||
```
|
||
|
||
#### 炼妖壶端配置
|
||
```yaml
|
||
# mcp_services.yml中添加
|
||
services:
|
||
- name: openmanus-scraper
|
||
type: stdio # 或 http
|
||
command: python
|
||
args: ["-m", "openmanus.mcp_server"]
|
||
env:
|
||
OPENMANUS_API_URL: "https://openmanus.your-domain.com"
|
||
OPENMANUS_API_KEY: "${OPENMANUS_API_KEY}"
|
||
dependencies: ["python>=3.9", "playwright"]
|
||
description: "OpenManus网页爬虫服务"
|
||
```
|
||
|
||
### 3. **消息队列异步调用**
|
||
|
||
#### 使用Redis/RabbitMQ
|
||
```python
|
||
# 炼妖壶端发送任务
|
||
import redis
|
||
import json
|
||
|
||
class OpenManusQueue:
|
||
def __init__(self, redis_url: str):
|
||
self.redis = redis.from_url(redis_url)
|
||
|
||
async def submit_scrape_task(self, url: str, callback_url: str = None):
|
||
"""提交爬虫任务到队列"""
|
||
task = {
|
||
"id": generate_task_id(),
|
||
"url": url,
|
||
"type": "seeking_alpha",
|
||
"callback_url": callback_url,
|
||
"timestamp": datetime.utcnow().isoformat()
|
||
}
|
||
|
||
self.redis.lpush("openmanus:tasks", json.dumps(task))
|
||
return task["id"]
|
||
|
||
async def get_result(self, task_id: str):
|
||
"""获取爬虫结果"""
|
||
result = self.redis.get(f"openmanus:result:{task_id}")
|
||
return json.loads(result) if result else None
|
||
```
|
||
|
||
### 4. **gRPC高性能调用**
|
||
|
||
#### OpenManus gRPC服务
|
||
```protobuf
|
||
// openmanus.proto
|
||
service OpenManusService {
|
||
rpc ScrapeSeekingAlpha(ScrapeRequest) returns (ScrapeResponse);
|
||
rpc GetTaskStatus(TaskRequest) returns (TaskResponse);
|
||
}
|
||
|
||
message ScrapeRequest {
|
||
string url = 1;
|
||
string extract_type = 2;
|
||
map<string, string> options = 3;
|
||
}
|
||
```
|
||
|
||
#### 炼妖壶gRPC客户端
|
||
```python
|
||
import grpc
|
||
from openmanus_pb2_grpc import OpenManusServiceStub
|
||
|
||
class OpenManusGRPCClient:
|
||
def __init__(self, server_address: str):
|
||
self.channel = grpc.aio.insecure_channel(server_address)
|
||
self.stub = OpenManusServiceStub(self.channel)
|
||
|
||
async def scrape_seeking_alpha(self, url: str):
|
||
request = ScrapeRequest(url=url, extract_type="article")
|
||
response = await self.stub.ScrapeSeekingAlpha(request)
|
||
return response
|
||
```
|
||
|
||
## 🔧 炼妖壶中的具体集成
|
||
|
||
### 1. **在N8N工作流中集成**
|
||
```javascript
|
||
// N8N自定义节点
|
||
{
|
||
"name": "OpenManus Scraper",
|
||
"type": "http-request",
|
||
"url": "https://openmanus.your-domain.com/scrape/seekingalpha",
|
||
"method": "POST",
|
||
"body": {
|
||
"url": "{{$json.article_url}}",
|
||
"extract_type": "full_article"
|
||
}
|
||
}
|
||
```
|
||
|
||
### 2. **在八仙论道中使用**
|
||
```python
|
||
# jixia_academy_clean/core/enhanced_jixia_agents.py
|
||
from openmanus_client import OpenManusClient
|
||
|
||
class EnhancedJixiaAgent:
|
||
def __init__(self):
|
||
self.openmanus = OpenManusClient(
|
||
base_url=os.getenv("OPENMANUS_API_URL"),
|
||
api_key=os.getenv("OPENMANUS_API_KEY")
|
||
)
|
||
|
||
async def research_topic(self, topic: str):
|
||
"""研究特定话题,使用OpenManus获取最新资讯"""
|
||
# 搜索相关文章
|
||
search_urls = await self.search_seeking_alpha(topic)
|
||
|
||
# 批量爬取内容
|
||
articles = []
|
||
for url in search_urls[:5]: # 限制数量
|
||
content = await self.openmanus.scrape_seeking_alpha(url)
|
||
articles.append(content)
|
||
|
||
# 分析内容并生成辩论观点
|
||
return self.generate_debate_points(articles)
|
||
```
|
||
|
||
### 3. **在太公心易系统中集成**
|
||
```python
|
||
# src/core/xinyi_system.py
|
||
class XinyiAnalysisEngine:
|
||
def __init__(self):
|
||
self.openmanus = OpenManusClient(
|
||
base_url=os.getenv("OPENMANUS_API_URL")
|
||
)
|
||
|
||
async def analyze_market_sentiment(self, symbol: str):
|
||
"""分析市场情绪,结合爬虫数据"""
|
||
# 获取Seeking Alpha上的相关分析
|
||
articles = await self.get_symbol_analysis(symbol)
|
||
|
||
# 结合太公心易的卦象分析
|
||
sentiment_score = self.calculate_sentiment(articles)
|
||
hexagram = self.generate_hexagram(sentiment_score)
|
||
|
||
return {
|
||
"symbol": symbol,
|
||
"sentiment": sentiment_score,
|
||
"hexagram": hexagram,
|
||
"articles": articles
|
||
}
|
||
```
|
||
|
||
## 🚀 部署和配置
|
||
|
||
### 1. **环境变量配置**
|
||
```bash
|
||
# .env文件中添加
|
||
OPENMANUS_API_URL=https://openmanus.your-domain.com
|
||
OPENMANUS_API_KEY=your-secret-api-key
|
||
OPENMANUS_TIMEOUT=30
|
||
OPENMANUS_RETRY_COUNT=3
|
||
```
|
||
|
||
### 2. **Docker Compose集成**
|
||
```yaml
|
||
# docker-compose.yml
|
||
version: '3.8'
|
||
services:
|
||
cauldron:
|
||
build: .
|
||
environment:
|
||
- OPENMANUS_API_URL=http://openmanus:8000
|
||
depends_on:
|
||
- openmanus
|
||
|
||
openmanus:
|
||
image: your-registry/openmanus:latest
|
||
ports:
|
||
- "8001:8000"
|
||
environment:
|
||
- PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||
```
|
||
|
||
### 3. **监控和日志**
|
||
```python
|
||
# 添加监控
|
||
import logging
|
||
from prometheus_client import Counter, Histogram
|
||
|
||
openmanus_requests = Counter('openmanus_requests_total', 'Total OpenManus requests')
|
||
openmanus_duration = Histogram('openmanus_request_duration_seconds', 'OpenManus request duration')
|
||
|
||
class MonitoredOpenManusClient(OpenManusClient):
|
||
async def scrape_seeking_alpha(self, url: str):
|
||
openmanus_requests.inc()
|
||
|
||
with openmanus_duration.time():
|
||
try:
|
||
result = await super().scrape_seeking_alpha(url)
|
||
logging.info(f"Successfully scraped: {url}")
|
||
return result
|
||
except Exception as e:
|
||
logging.error(f"Failed to scrape {url}: {e}")
|
||
raise
|
||
```
|
||
|
||
## 💡 推荐方案
|
||
|
||
基于你的项目特点,我推荐:
|
||
|
||
1. **主要方案**: HTTP API + MCP协议
|
||
2. **备用方案**: 消息队列(处理大量任务时)
|
||
3. **监控**: Prometheus + Grafana
|
||
4. **缓存**: Redis缓存爬虫结果
|
||
|
||
这样既保持了架构的清晰分离,又能充分利用OpenManus的爬虫能力! |