Initial commit: 炼妖壶 (Lianyaohu) - 稷下学宫AI辩论系统

- 🏛️ 稷下学宫八仙论道AI辩论系统 - 🌍 天下体系资本生态分析 - 🔒 安全配置管理 (Doppler集成) - 📊 RapidAPI永动机数据引擎 - 🎨 Streamlit现代化界面 - ✅ 清理所有敏感信息泄露
2025-08-02 16:32:37 +00:00
commit a24b887e8b
110 changed files with 23995 additions and 0 deletions
--- a/scripts/add_sequence_ids.py
+++ b/scripts/add_sequence_ids.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""
+为现有文章添加流水号
+"""
+
+import os
+from pymongo import MongoClient
+
+def add_sequence_ids():
+    """为现有文章添加流水号"""
+    # 连接MongoDB
+    mongo_uri = os.getenv('MONGODB_URI', 'mongodb+srv://ben:313131@cauldron.tx3qnoq.mongodb.net/')
+    client = MongoClient(mongo_uri)
+    db = client['taigong']
+    collection = db['articles']
+    
+    print("开始为现有文章添加流水号...")
+    
+    # 查找所有没有sequence_id的文章
+    articles_without_seq = list(collection.find(
+        {"sequence_id": {"$exists": False}},
+        {"_id": 1, "title": 1, "created_at": 1}
+    ).sort("created_at", 1))  # 按创建时间排序
+    
+    print(f"找到 {len(articles_without_seq)} 篇文章需要添加流水号")
+    
+    if len(articles_without_seq) == 0:
+        print("所有文章都已有流水号")
+        return
+    
+    # 从1开始分配流水号
+    for i, article in enumerate(articles_without_seq, 1):
+        sequence_id = i
+        article_id = f"NEWS_{sequence_id:08d}"  # NEWS_00000001 格式
+        
+        collection.update_one(
+            {"_id": article["_id"]},
+            {
+                "$set": {
+                    "sequence_id": sequence_id,
+                    "article_id": article_id,
+                    "batch_id": "migration_batch",
+                    "last_updated": "2025-02-08T00:00:00Z"
+                }
+            }
+        )
+        
+        print(f"  {sequence_id:3d}: {article['title'][:50]}...")
+    
+    print(f"流水号添加完成，共处理 {len(articles_without_seq)} 篇文章")
+    
+    # 验证结果
+    total_with_seq = collection.count_documents({"sequence_id": {"$exists": True}})
+    max_seq = collection.find_one({}, sort=[("sequence_id", -1)])
+    
+    print(f"验证结果:")
+    print(f"  有流水号的文章: {total_with_seq} 篇")
+    print(f"  最大流水号: {max_seq['sequence_id'] if max_seq else 0}")
+
+if __name__ == "__main__":
+    add_sequence_ids()
--- a/scripts/cleanup_duplicates.py
+++ b/scripts/cleanup_duplicates.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+清理MongoDB中的重复文章数据
+"""
+
+import os
+import sys
+from pymongo import MongoClient
+from collections import defaultdict
+import hashlib
+
+def generate_stable_id(title, pub_date, content):
+    """生成稳定的文章ID"""
+    normalized_title = title.strip().lower()
+    content_hash = content[:100] if content else ''
+    date_str = pub_date or ''
+    
+    combined = f"{normalized_title}|{date_str}|{content_hash}"
+    return hashlib.md5(combined.encode()).hexdigest()[:16]
+
+def cleanup_duplicates():
+    """清理重复数据"""
+    # 连接MongoDB
+    mongo_uri = os.getenv('MONGODB_URI', 'mongodb+srv://ben:313131@cauldron.tx3qnoq.mongodb.net/')
+    client = MongoClient(mongo_uri)
+    db = client['taigong']
+    collection = db['articles']
+    
+    print("开始清理重复数据...")
+    
+    # 1. 查找所有文章
+    articles = list(collection.find({}))
+    print(f"总共找到 {len(articles)} 篇文章")
+    
+    # 2. 按标题分组，找出重复项
+    title_groups = defaultdict(list)
+    for article in articles:
+        title_groups[article['title']].append(article)
+    
+    # 3. 处理重复项
+    duplicates_removed = 0
+    articles_updated = 0
+    
+    for title, group in title_groups.items():
+        if len(group) > 1:
+            print(f"发现重复标题: {title} ({len(group)} 篇)")
+            
+            # 保留最早的一篇，删除其他
+            group.sort(key=lambda x: x.get('created_at', ''))
+            keep_article = group[0]
+            
+            # 更新保留文章的ID为稳定ID
+            stable_id = generate_stable_id(
+                keep_article['title'],
+                keep_article.get('published_time', ''),
+                keep_article.get('content', '')
+            )
+            
+            collection.update_one(
+                {'_id': keep_article['_id']},
+                {
+                    '$set': {
+                        'article_id': stable_id,
+                        'content_hash': generate_stable_id(keep_article.get('content', ''), '', ''),
+                        'last_updated': '2025-02-08T00:00:00Z'
+                    }
+                }
+            )
+            articles_updated += 1
+            
+            # 删除重复项
+            for duplicate in group[1:]:
+                collection.delete_one({'_id': duplicate['_id']})
+                duplicates_removed += 1
+                print(f"  删除重复项: {duplicate.get('article_id', 'unknown')}")
+    
+    # 4. 为没有重复的文章更新ID
+    single_articles = [group[0] for group in title_groups.values() if len(group) == 1]
+    for article in single_articles:
+        if not article.get('article_id') or len(article.get('article_id', '')) > 20:
+            stable_id = generate_stable_id(
+                article['title'],
+                article.get('published_time', ''),
+                article.get('content', '')
+            )
+            
+            collection.update_one(
+                {'_id': article['_id']},
+                {
+                    '$set': {
+                        'article_id': stable_id,
+                        'content_hash': generate_stable_id(article.get('content', ''), '', ''),
+                        'last_updated': '2025-02-08T00:00:00Z'
+                    }
+                }
+            )
+            articles_updated += 1
+    
+    print(f"清理完成:")
+    print(f"  删除重复文章: {duplicates_removed} 篇")
+    print(f"  更新文章ID: {articles_updated} 篇")
+    print(f"  最终文章数: {collection.count_documents({})} 篇")
+
+if __name__ == "__main__":
+    cleanup_duplicates()
--- a/scripts/install_swarm.py
+++ b/scripts/install_swarm.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""
+安装OpenAI Swarm的脚本
+"""
+
+import subprocess
+import sys
+
+def install_swarm():
+    """安装OpenAI Swarm"""
+    print("🚀 正在安装OpenAI Swarm...")
+    
+    try:
+        # 安装Swarm
+        result = subprocess.run([
+            sys.executable, "-m", "pip", "install", 
+            "git+https://github.com/openai/swarm.git"
+        ], check=True, capture_output=True, text=True)
+        
+        print("✅ OpenAI Swarm安装成功！")
+        print(result.stdout)
+        
+        # 验证安装
+        try:
+            import swarm
+            print("✅ Swarm导入测试成功")
+            print(f"📦 Swarm版本: {getattr(swarm, '__version__', '未知')}")
+        except ImportError as e:
+            print(f"❌ Swarm导入失败: {e}")
+            return False
+        
+        return True
+        
+    except subprocess.CalledProcessError as e:
+        print(f"❌ 安装失败: {e}")
+        print(f"错误输出: {e.stderr}")
+        return False
+    except Exception as e:
+        print(f"❌ 未知错误: {e}")
+        return False
+
+def main():
+    """主函数"""
+    print("🏛️ 稷下学宫Swarm环境安装")
+    print("=" * 40)
+    
+    # 检查是否已安装
+    try:
+        import swarm
+        print("✅ OpenAI Swarm已安装")
+        print(f"📦 版本: {getattr(swarm, '__version__', '未知')}")
+        
+        choice = input("是否重新安装？(y/N): ").strip().lower()
+        if choice not in ['y', 'yes']:
+            print("🎉 安装检查完成")
+            return
+    except ImportError:
+        print("📦 OpenAI Swarm未安装，开始安装...")
+    
+    # 安装Swarm
+    success = install_swarm()
+    
+    if success:
+        print("\n🎉 安装完成！现在可以使用Swarm八仙论道了")
+        print("💡 使用方法:")
+        print("   python src/jixia/debates/swarm_debate.py")
+        print("   或在Streamlit应用中选择'Swarm模式'")
+    else:
+        print("\n❌ 安装失败，请手动安装:")
+        print("   pip install git+https://github.com/openai/swarm.git")
+
+if __name__ == "__main__":
+    main()
--- a/scripts/n8n_combined_dedup_insert.js
+++ b/scripts/n8n_combined_dedup_insert.js
@@ -0,0 +1,148 @@
+const items = $input.all();
+const results = [];
+
+// 改进的哈希函数 - 基于内容生成稳定的ID
+function generateStableId(title, pubDate, content) {
+    const normalizedTitle = title.trim().toLowerCase();
+    const contentHash = content ? content.substring(0, 100) : '';
+    const dateStr = pubDate || '';
+    
+    const combined = normalizedTitle + '|' + dateStr + '|' + contentHash;
+    
+    let hash = 0;
+    for (let i = 0; i < combined.length; i++) {
+        const char = combined.charCodeAt(i);
+        hash = ((hash << 5) - hash) + char;
+        hash = hash & hash;
+    }
+    return Math.abs(hash).toString(16);
+}
+
+console.log(`开始处理 ${items.length} 条RSS数据`);
+
+// 用于本次执行内去重
+const processedInThisRun = new Set();
+
+// 处理每个RSS项目
+for (const item of items) {
+    const data = item.json;
+    
+    // 跳过无效数据
+    if (!data.title) {
+        console.log('跳过无标题数据');
+        continue;
+    }
+    
+    // 生成稳定的文章ID
+    const stableId = generateStableId(
+        data.title, 
+        data.isoDate || data.pubDate,
+        data['content:encodedSnippet'] || data.contentSnippet || ''
+    );
+    
+    // 生成内容哈希
+    const contentHash = generateStableId(
+        data['content:encodedSnippet'] || data.contentSnippet || '', 
+        '', 
+        ''
+    );
+    
+    // 准备文章数据
+    const articleData = {
+        article_id: stableId,
+        title: data.title,
+        content: data['content:encodedSnippet'] || data.contentSnippet || '',
+        content_hash: contentHash,
+        published_time: data.isoDate || data.pubDate || new Date().toISOString(),
+        source_url: data.link || '',
+        processed: false,
+        created_at: new Date().toISOString(),
+        last_updated: new Date().toISOString()
+    };
+    
+    try {
+        // 使用upsert操作，避免重复插入
+        const result = await mongoClient.db('taigong').collection('articles').updateOne(
+            { 
+                $or: [
+                    { article_id: stableId },
+                    { title: data.title }
+                ]
+            },
+            {
+                $setOnInsert: {
+                    article_id: stableId,
+                    title: data.title,
+                    content: articleData.content,
+                    content_hash: contentHash,
+                    published_time: articleData.published_time,
+                    source_url: articleData.source_url,
+                    processed: false,
+                    created_at: articleData.created_at
+                },
+                $set: {
+                    last_updated: new Date().toISOString()
+                }
+            },
+            { upsert: true }
+        );
+        
+        if (result.upsertedCount > 0) {
+            console.log('✅ 新增文章:', data.title);
+            results.push({
+                json: {
+                    action: 'inserted',
+                    article_id: stableId,
+                    title: data.title,
+                    status: 'success'
+                }
+            });
+        } else if (result.modifiedCount > 0) {
+            console.log('🔄 更新文章:', data.title);
+            results.push({
+                json: {
+                    action: 'updated',
+                    article_id: stableId,
+                    title: data.title,
+                    status: 'success'
+                }
+            });
+        } else {
+            console.log('⏭️  文章已存在，跳过:', data.title);
+        }
+        
+    } catch (error) {
+        console.error('❌ 处理文章失败:', data.title, error.message);
+        results.push({
+            json: {
+                action: 'error',
+                title: data.title,
+                error: error.message,
+                status: 'failed'
+            }
+        });
+    }
+}
+
+console.log(`处理完成: 原始${items.length}条, 成功处理${results.length}条`);
+
+// 统计结果
+const stats = results.reduce((acc, item) => {
+    acc[item.json.action] = (acc[item.json.action] || 0) + 1;
+    return acc;
+}, {});
+
+console.log('处理统计:', stats);
+
+// 如果没有任何结果，返回一个空的成功状态
+if (results.length === 0) {
+    return [{
+        json: {
+            message: '没有新数据需要处理',
+            total_processed: items.length,
+            status: 'completed'
+        }
+    }];
+}
+
+return results;
--- a/scripts/n8n_deduplication_fix.js
+++ b/scripts/n8n_deduplication_fix.js
@@ -0,0 +1,85 @@
+const items = $input.all();
+const processedItems = [];
+
+// 改进的哈希函数 - 基于内容生成稳定的ID
+function generateStableId(title, pubDate, content) {
+    const normalizedTitle = title.trim().toLowerCase();
+    const contentHash = content ? content.substring(0, 100) : '';
+    const dateStr = pubDate || '';
+    
+    const combined = normalizedTitle + '|' + dateStr + '|' + contentHash;
+    
+    let hash = 0;
+    for (let i = 0; i < combined.length; i++) {
+        const char = combined.charCodeAt(i);
+        hash = ((hash << 5) - hash) + char;
+        hash = hash & hash;
+    }
+    return Math.abs(hash).toString(16);
+}
+
+// 1. 从数据库查询已存在的文章ID和标题
+const existingArticles = new Set();
+try {
+    const existing = await mongoClient.db('taigong').collection('articles')
+        .find({}, { projection: { article_id: 1, title: 1, content_hash: 1 } })
+        .toArray();
+    
+    existing.forEach(article => {
+        existingArticles.add(article.article_id);
+        // 同时用标题做备用检查
+        existingArticles.add(article.title);
+    });
+    
+    console.log(`数据库中已有 ${existing.length} 篇文章`);
+} catch (error) {
+    console.log('查询现有文章失败:', error);
+}
+
+// 2. 处理新数据
+for (const item of items) {
+    const data = item.json;
+    
+    // 跳过无效数据
+    if (!data.title) continue;
+    
+    // 生成稳定的文章ID
+    const stableId = generateStableId(
+        data.title, 
+        data.isoDate || data.pubDate,
+        data['content:encodedSnippet'] || data.contentSnippet || ''
+    );
+    
+    // 检查是否已存在（用ID和标题双重检查）
+    if (existingArticles.has(stableId) || existingArticles.has(data.title)) {
+        console.log('跳过重复文章:', data.title);
+        continue;
+    }
+    
+    // 生成内容哈希用于后续去重检查
+    const contentHash = generateStableId(
+        data['content:encodedSnippet'] || data.contentSnippet || '', 
+        '', 
+        ''
+    );
+    
+    const processedItem = {
+        article_id: stableId,  // 使用稳定ID
+        title: data.title,
+        content: data['content:encodedSnippet'] || data.contentSnippet || '',
+        content_hash: contentHash,  // 新增：内容哈希
+        published_time: data.isoDate || data.pubDate || new Date().toISOString(),
+        source_url: data.link || '',  // 新增：源链接
+        processed: false,
+        created_at: new Date().toISOString(),
+        last_updated: new Date().toISOString()  // 新增：更新时间
+    };
+    
+    processedItems.push({ json: processedItem });
+    // 添加到已存在集合，避免本次执行内重复
+    existingArticles.add(stableId);
+    existingArticles.add(data.title);
+}
+
+console.log(`处理完成: 原始${items.length}条, 去重后${processedItems.length}条`);
+return processedItems;
--- a/scripts/n8n_direct_insert.js
+++ b/scripts/n8n_direct_insert.js
@@ -0,0 +1,85 @@
+const items = $input.all();
+const results = [];
+
+// 如果没有数据需要插入
+if (items.length === 0 || (items.length === 1 && items[0].json.status === 'no_new_data')) {
+    console.log('没有新数据需要插入');
+    return items;
+}
+
+console.log(`准备插入 ${items.length} 条新文章`);
+
+// 准备批量插入的数据
+const documentsToInsert = items.map(item => item.json);
+
+try {
+    // 批量插入，因为已经确保了唯一性，所以直接插入
+    const result = await mongoClient.db('taigong').collection('articles').insertMany(
+        documentsToInsert,
+        { ordered: false } // 即使某条失败也继续插入其他的
+    );
+    
+    console.log(`✅ 成功插入 ${result.insertedCount} 条文章`);
+    
+    // 返回插入结果
+    for (let i = 0; i < documentsToInsert.length; i++) {
+        const doc = documentsToInsert[i];
+        const insertedId = result.insertedIds[i];
+        
+        results.push({
+            json: {
+                action: 'inserted',
+                sequence_id: doc.sequence_id,
+                article_id: doc.article_id,
+                title: doc.title,
+                mongodb_id: insertedId,
+                status: 'success'
+            }
+        });
+    }
+    
+} catch (error) {
+    console.error('❌ 批量插入失败:', error.message);
+    
+    // 如果批量插入失败，尝试逐条插入
+    console.log('尝试逐条插入...');
+    
+    for (const doc of documentsToInsert) {
+        try {
+            const result = await mongoClient.db('taigong').collection('articles').insertOne(doc);
+            
+            console.log(`✅ 单条插入成功: ${doc.article_id}`);
+            results.push({
+                json: {
+                    action: 'inserted',
+                    sequence_id: doc.sequence_id,
+                    article_id: doc.article_id,
+                    title: doc.title,
+                    mongodb_id: result.insertedId,
+                    status: 'success'
+                }
+            });
+            
+        } catch (singleError) {
+            console.error(`❌ 单条插入失败 ${doc.article_id}:`, singleError.message);
+            results.push({
+                json: {
+                    action: 'error',
+                    sequence_id: doc.sequence_id,
+                    article_id: doc.article_id,
+                    title: doc.title,
+                    error: singleError.message,
+                    status: 'failed'
+                }
+            });
+        }
+    }
+}
+
+// 统计结果
+const successCount = results.filter(r => r.json.status === 'success').length;
+const failCount = results.filter(r => r.json.status === 'failed').length;
+
+console.log(`插入完成: 成功 ${successCount} 条, 失败 ${failCount} 条`);
+
+return results;
--- a/scripts/n8n_minimal_news.js
+++ b/scripts/n8n_minimal_news.js
@@ -0,0 +1,39 @@
+const items = $input.all();
+
+console.log(`原始数据: ${items.length} 条`);
+
+// 本批次内去重
+const seenTitles = new Set();
+const uniqueItems = [];
+
+// 生成起始ID（基于时间戳，确保每次运行都不同）
+let nextId = Math.floor(Date.now() / 1000);
+
+for (const item of items) {
+    const data = item.json;
+
+    // 跳过无效数据
+    if (!data.title) continue;
+
+    // 本批次内去重
+    if (seenTitles.has(data.title)) {
+        console.log('⏭️  本批次重复，跳过:', data.title);
+        continue;
+    }
+
+    const newsItem = {
+        id: nextId,
+        title: data.title,
+        published_time: data.isoDate || data.pubDate || new Date().toISOString(),
+        source_url: data.link || ''
+    };
+
+    uniqueItems.push({ json: newsItem });
+    seenTitles.add(data.title);
+
+    console.log(`✅ ID ${nextId}: ${data.title}`);
+    nextId++;
+}
+
+console.log(`本批次去重后: ${uniqueItems.length} 条`);
+return uniqueItems;
--- a/scripts/n8n_safe_insert.js
+++ b/scripts/n8n_safe_insert.js
@@ -0,0 +1,54 @@
+// n8n MongoDB插入节点代码
+const items = $input.all();
+const results = [];
+
+for (const item of items) {
+    const data = item.json;
+    
+    try {
+        // 使用upsert操作，避免重复插入
+        const result = await mongoClient.db('taigong').collection('articles').updateOne(
+            { 
+                $or: [
+                    { article_id: data.article_id },
+                    { title: data.title }
+                ]
+            },
+            {
+                $setOnInsert: {
+                    article_id: data.article_id,
+                    title: data.title,
+                    content: data.content,
+                    content_hash: data.content_hash,
+                    published_time: data.published_time,
+                    source_url: data.source_url,
+                    processed: data.processed,
+                    created_at: data.created_at
+                },
+                $set: {
+                    last_updated: new Date().toISOString()
+                }
+            },
+            { upsert: true }
+        );
+        
+        if (result.upsertedCount > 0) {
+            console.log('新增文章:', data.title);
+            results.push({
+                json: {
+                    action: 'inserted',
+                    article_id: data.article_id,
+                    title: data.title
+                }
+            });
+        } else {
+            console.log('文章已存在，跳过:', data.title);
+        }
+        
+    } catch (error) {
+        console.error('插入文章失败:', data.title, error);
+    }
+}
+
+console.log(`成功处理 ${results.length} 篇新文章`);
+return results;
--- a/scripts/n8n_sequential_id_system.js
+++ b/scripts/n8n_sequential_id_system.js
@@ -0,0 +1,119 @@
+const items = $input.all();
+const processedItems = [];
+
+// 获取当前最大流水号
+async function getCurrentMaxId() {
+    try {
+        const result = await mongoClient.db('taigong').collection('articles')
+            .findOne({}, { 
+                sort: { sequence_id: -1 }, 
+                projection: { sequence_id: 1 } 
+            });
+        
+        return result ? result.sequence_id : 0;
+    } catch (error) {
+        console.log('获取最大流水号失败，从1开始:', error.message);
+        return 0;
+    }
+}
+
+// 获取已存在的文章标题集合（用于去重检查）
+async function getExistingTitles() {
+    try {
+        const existing = await mongoClient.db('taigong').collection('articles')
+            .find({}, { projection: { title: 1 } })
+            .toArray();
+        
+        return new Set(existing.map(doc => doc.title));
+    } catch (error) {
+        console.log('获取已存在标题失败:', error.message);
+        return new Set();
+    }
+}
+
+// 生成内容哈希（用于内容变化检测）
+function generateContentHash(content) {
+    if (!content) return '';
+    
+    let hash = 0;
+    const str = content.substring(0, 200); // 取前200字符
+    for (let i = 0; i < str.length; i++) {
+        const char = str.charCodeAt(i);
+        hash = ((hash << 5) - hash) + char;
+        hash = hash & hash;
+    }
+    return Math.abs(hash).toString(16);
+}
+
+console.log(`开始处理 ${items.length} 条RSS数据`);
+
+// 1. 获取当前最大流水号
+const currentMaxId = await getCurrentMaxId();
+console.log(`当前数据库最大流水号: ${currentMaxId}`);
+
+// 2. 获取已存在的文章标题
+const existingTitles = await getExistingTitles();
+console.log(`数据库中已有 ${existingTitles.size} 篇文章`);
+
+// 3. 处理新数据，分配流水号
+let nextSequenceId = currentMaxId + 1;
+const seenTitlesInBatch = new Set(); // 本批次内去重
+
+for (const item of items) {
+    const data = item.json;
+    
+    // 跳过无效数据
+    if (!data.title) {
+        console.log('跳过无标题数据');
+        continue;
+    }
+    
+    // 检查是否已存在（数据库 + 本批次）
+    if (existingTitles.has(data.title) || seenTitlesInBatch.has(data.title)) {
+        console.log('⏭️  跳过重复文章:', data.title);
+        continue;
+    }
+    
+    // 分配新的流水号
+    const sequenceId = nextSequenceId++;
+    
+    // 生成文章数据
+    const articleData = {
+        sequence_id: sequenceId,  // 主键：流水号
+        article_id: `NEWS_${sequenceId.toString().padStart(8, '0')}`, // 格式化ID：NEWS_00000001
+        title: data.title,
+        content: data['content:encodedSnippet'] || data.contentSnippet || '',
+        content_hash: generateContentHash(data['content:encodedSnippet'] || data.contentSnippet || ''),
+        published_time: data.isoDate || data.pubDate || new Date().toISOString(),
+        source_url: data.link || '',
+        rss_source: data.meta?.title || 'unknown', // RSS源名称
+        processed: false,
+        created_at: new Date().toISOString(),
+        batch_id: Date.now().toString() // 批次ID，用于追踪
+    };
+    
+    processedItems.push({ json: articleData });
+    seenTitlesInBatch.add(data.title);
+    
+    console.log(`✅ 分配流水号 ${sequenceId}: ${data.title}`);
+}
+
+console.log(`流水号分配完成:`);
+console.log(`  原始数据: ${items.length} 条`);
+console.log(`  跳过重复: ${items.length - processedItems.length} 条`);
+console.log(`  新增数据: ${processedItems.length} 条`);
+console.log(`  流水号范围: ${currentMaxId + 1} - ${nextSequenceId - 1}`);
+
+// 如果没有新数据，返回空结果
+if (processedItems.length === 0) {
+    return [{
+        json: {
+            message: '没有新数据需要处理',
+            current_max_id: currentMaxId,
+            total_articles_in_db: existingTitles.size,
+            status: 'no_new_data'
+        }
+    }];
+}
+
+return processedItems;
--- a/scripts/n8n_simple_dedup.js
+++ b/scripts/n8n_simple_dedup.js
@@ -0,0 +1,52 @@
+const items = $input.all();
+
+// 简单哈希函数
+function simpleHash(str) {
+    let hash = 0;
+    for (let i = 0; i < str.length; i++) {
+        const char = str.charCodeAt(i);
+        hash = ((hash << 5) - hash) + char;
+        hash = hash & hash;
+    }
+    return Math.abs(hash).toString(16);
+}
+
+console.log(`原始数据: ${items.length} 条`);
+
+// 用标题去重
+const seenTitles = new Set();
+const uniqueItems = [];
+
+for (const item of items) {
+    const data = item.json;
+
+    // 跳过无效数据
+    if (!data.title) continue;
+
+    // 本批次内去重
+    if (seenTitles.has(data.title)) {
+        console.log('跳过重复:', data.title);
+        continue;
+    }
+
+    // 生成稳定ID
+    const stableId = simpleHash(data.title + (data.isoDate || data.pubDate || ''));
+
+    const processedItem = {
+        article_id: stableId,
+        title: data.title,
+        content: data['content:encodedSnippet'] || data.contentSnippet || '',
+        published_time: data.isoDate || data.pubDate || new Date().toISOString(),
+        source_url: data.link || '',
+        processed: false,
+        created_at: new Date().toISOString()
+    };
+
+    uniqueItems.push({ json: processedItem });
+    seenTitles.add(data.title);
+
+    console.log(`✅ 处理: ${data.title}`);
+}
+
+console.log(`去重后: ${uniqueItems.length} 条`);
+return uniqueItems;
--- a/scripts/n8n_universal_mongo.js
+++ b/scripts/n8n_universal_mongo.js
@@ -0,0 +1,163 @@
+const items = $input.all();
+const results = [];
+
+// 通用MongoDB连接获取函数
+function getMongoConnection() {
+    // 尝试不同的MongoDB连接变量名
+    if (typeof mongoClient !== 'undefined') return mongoClient;
+    if (typeof mongo !== 'undefined') return mongo;
+    if (typeof db !== 'undefined') return db;
+    if (typeof $mongo !== 'undefined') return $mongo;
+    if (typeof client !== 'undefined') return client;
+    
+    throw new Error('找不到MongoDB连接对象，请检查n8n MongoDB节点配置');
+}
+
+// 改进的哈希函数 - 基于内容生成稳定的ID
+function generateStableId(title, pubDate, content) {
+    const normalizedTitle = title.trim().toLowerCase();
+    const contentHash = content ? content.substring(0, 100) : '';
+    const dateStr = pubDate || '';
+    
+    const combined = normalizedTitle + '|' + dateStr + '|' + contentHash;
+    
+    let hash = 0;
+    for (let i = 0; i < combined.length; i++) {
+        const char = combined.charCodeAt(i);
+        hash = ((hash << 5) - hash) + char;
+        hash = hash & hash;
+    }
+    return Math.abs(hash).toString(16);
+}
+
+console.log(`开始处理 ${items.length} 条RSS数据`);
+
+// 获取MongoDB连接
+let mongoConnection;
+try {
+    mongoConnection = getMongoConnection();
+    console.log('✅ MongoDB连接获取成功');
+} catch (error) {
+    console.error('❌ MongoDB连接失败:', error.message);
+    return [{
+        json: {
+            error: 'MongoDB连接失败',
+            message: error.message,
+            status: 'connection_failed'
+        }
+    }];
+}
+
+// 用于本次执行内去重
+const processedInThisRun = new Set();
+
+// 处理每个RSS项目
+for (const item of items) {
+    const data = item.json;
+    
+    // 跳过无效数据
+    if (!data.title) {
+        console.log('跳过无标题数据');
+        continue;
+    }
+    
+    // 本次执行内去重检查
+    if (processedInThisRun.has(data.title)) {
+        console.log('⏭️  本次执行内重复，跳过:', data.title);
+        continue;
+    }
+    
+    // 生成稳定的文章ID
+    const stableId = generateStableId(
+        data.title, 
+        data.isoDate || data.pubDate,
+        data['content:encodedSnippet'] || data.contentSnippet || ''
+    );
+    
+    // 生成内容哈希
+    const contentHash = generateStableId(
+        data['content:encodedSnippet'] || data.contentSnippet || '', 
+        '', 
+        ''
+    );
+    
+    // 准备文章数据
+    const articleData = {
+        article_id: stableId,
+        title: data.title,
+        content: data['content:encodedSnippet'] || data.contentSnippet || '',
+        content_hash: contentHash,
+        published_time: data.isoDate || data.pubDate || new Date().toISOString(),
+        source_url: data.link || '',
+        rss_source: data.meta?.title || 'unknown',
+        processed: false,
+        created_at: new Date().toISOString(),
+        last_updated: new Date().toISOString()
+    };
+    
+    try {
+        // 检查数据库中是否已存在
+        const existing = await mongoConnection.db('taigong').collection('articles').findOne({
+            $or: [
+                { article_id: stableId },
+                { title: data.title }
+            ]
+        });
+        
+        if (existing) {
+            console.log('⏭️  数据库中已存在，跳过:', data.title);
+            continue;
+        }
+        
+        // 插入新文章
+        const result = await mongoConnection.db('taigong').collection('articles').insertOne(articleData);
+        
+        console.log('✅ 新增文章:', data.title);
+        results.push({
+            json: {
+                action: 'inserted',
+                article_id: stableId,
+                title: data.title,
+                mongodb_id: result.insertedId,
+                status: 'success'
+            }
+        });
+        
+        // 添加到本次执行的去重集合
+        processedInThisRun.add(data.title);
+        
+    } catch (error) {
+        console.error('❌ 处理文章失败:', data.title, error.message);
+        results.push({
+            json: {
+                action: 'error',
+                title: data.title,
+                error: error.message,
+                status: 'failed'
+            }
+        });
+    }
+}
+
+console.log(`处理完成: 原始${items.length}条, 成功处理${results.length}条`);
+
+// 统计结果
+const stats = results.reduce((acc, item) => {
+    acc[item.json.action] = (acc[item.json.action] || 0) + 1;
+    return acc;
+}, {});
+
+console.log('处理统计:', stats);
+
+// 如果没有任何结果，返回一个空的成功状态
+if (results.length === 0) {
+    return [{
+        json: {
+            message: '没有新数据需要处理',
+            total_processed: items.length,
+            status: 'completed'
+        }
+    }];
+}
+
+return results;
--- a/scripts/test_openrouter_api.py
+++ b/scripts/test_openrouter_api.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""
+测试OpenRouter API连接
+重构版本：使用统一配置管理
+"""
+
+import requests
+from typing import Dict, Any
+
+def test_openrouter_api() -> bool:
+    """
+    测试OpenRouter API连接
+    
+    Returns:
+        测试是否成功
+    """
+    # 使用统一配置管理
+    try:
+        from config.doppler_config import get_openrouter_key
+        api_key = get_openrouter_key()
+    except ImportError:
+        # 如果配置模块不可用，使用环境变量
+        import os
+        api_key = os.getenv('OPENROUTER_API_KEY_1')
+    except Exception as e:
+        print(f"❌ 无法获取API密钥: {e}")
+        return False
+    
+    if not api_key:
+        print("❌ 未找到OpenRouter API密钥")
+        print("请确保已配置 OPENROUTER_API_KEY_1 环境变量")
+        return False
+    
+    print(f"🔑 使用API密钥: {api_key[:20]}...")
+    
+    # 测试API调用
+    url = "https://openrouter.ai/api/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "HTTP-Referer": "https://github.com/ben/liurenchaxin",
+        "X-Title": "Jixia Academy Debate System",
+        "Content-Type": "application/json"
+    }
+    
+    data = {
+        "model": "openai/gpt-3.5-turbo",
+        "messages": [
+            {"role": "user", "content": "你好，请简单回复一下测试连接"}
+        ],
+        "max_tokens": 50
+    }
+    
+    try:
+        print("📡 正在测试API连接...")
+        response = requests.post(url, headers=headers, json=data, timeout=30)
+        print(f"📡 响应状态码: {response.status_code}")
+        
+        if response.status_code == 200:
+            result = response.json()
+            print("✅ OpenRouter API连接成功!")
+            if 'choices' in result and len(result['choices']) > 0:
+                content = result['choices'][0]['message']['content']
+                print(f"📝 AI回复: {content}")
+            else:
+                print("📝 API响应格式异常，但连接成功")
+            return True
+        else:
+            print(f"❌ API调用失败: HTTP {response.status_code}")
+            print(f"错误详情: {response.text}")
+            return False
+            
+    except requests.exceptions.Timeout:
+        print("❌ 请求超时，请检查网络连接")
+        return False
+    except requests.exceptions.RequestException as e:
+        print(f"❌ 网络请求异常: {e}")
+        return False
+    except Exception as e:
+        print(f"❌ 未知异常: {e}")
+        return False
+
+def test_rapidapi_connection() -> bool:
+    """
+    测试RapidAPI连接
+    
+    Returns:
+        测试是否成功
+    """
+    try:
+        from config.doppler_config import get_rapidapi_key
+        api_key = get_rapidapi_key()
+    except ImportError:
+        import os
+        api_key = os.getenv('RAPIDAPI_KEY')
+    except Exception as e:
+        print(f"❌ 无法获取RapidAPI密钥: {e}")
+        return False
+    
+    if not api_key:
+        print("❌ 未找到RapidAPI密钥")
+        return False
+    
+    print(f"🔑 测试RapidAPI连接...")
+    
+    # 测试一个简单的API端点
+    url = "https://yahoo-finance15.p.rapidapi.com/api/yahoo/qu/quote/AAPL"
+    headers = {
+        'X-RapidAPI-Key': api_key,
+        'X-RapidAPI-Host': 'yahoo-finance15.p.rapidapi.com'
+    }
+    
+    try:
+        response = requests.get(url, headers=headers, timeout=10)
+        if response.status_code == 200:
+            print("✅ RapidAPI连接成功!")
+            return True
+        else:
+            print(f"❌ RapidAPI连接失败: HTTP {response.status_code}")
+            return False
+    except Exception as e:
+        print(f"❌ RapidAPI连接异常: {e}")
+        return False
+
+def main():
+    """主函数 - 运行所有API连接测试"""
+    print("🧪 API连接测试套件")
+    print("=" * 50)
+    
+    # 测试配置验证
+    try:
+        from config.doppler_config import validate_config
+        print("\n🔧 验证配置...")
+        config_valid = validate_config()
+    except ImportError:
+        print("⚠️ 配置模块不可用，跳过配置验证")
+        config_valid = True
+    
+    # 测试OpenRouter API
+    print("\n🤖 测试OpenRouter API...")
+    openrouter_success = test_openrouter_api()
+    
+    # 测试RapidAPI
+    print("\n📊 测试RapidAPI...")
+    rapidapi_success = test_rapidapi_api()
+    
+    # 总结测试结果
+    print("\n" + "=" * 50)
+    print("📋 测试结果总结:")
+    print(f"  配置验证: {'✅ 通过' if config_valid else '❌ 失败'}")
+    print(f"  OpenRouter API: {'✅ 通过' if openrouter_success else '❌ 失败'}")
+    print(f"  RapidAPI: {'✅ 通过' if rapidapi_success else '❌ 失败'}")
+    
+    all_passed = config_valid and openrouter_success and rapidapi_success
+    if all_passed:
+        print("\n🎉 所有API连接测试通过！系统已准备就绪。")
+    else:
+        print("\n⚠️ 部分测试失败，请检查配置和网络连接。")
+    
+    return all_passed
+
+if __name__ == "__main__":
+    success = main()
+    exit(0 if success else 1)
--- a/scripts/test_rapidapi_inventory.py
+++ b/scripts/test_rapidapi_inventory.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+"""
+RapidAPI库存测试脚本
+自动测试所有订阅的API服务，生成可用性报告
+"""
+
+import requests
+import json
+import time
+from datetime import datetime
+from typing import Dict, List, Any
+import os
+
+class RapidAPITester:
+    """RapidAPI测试器"""
+    
+    def __init__(self):
+        """初始化测试器"""
+        # 从环境变量获取API密钥
+        self.api_key = os.getenv('RAPIDAPI_KEY')
+        if not self.api_key:
+            raise ValueError("RAPIDAPI_KEY环境变量未设置")
+        
+        # API配置 - 基于永动机引擎的配置
+        self.api_configs = {
+            'alpha_vantage': 'alpha-vantage.p.rapidapi.com',
+            'yahoo_finance_1': 'yahoo-finance15.p.rapidapi.com',
+            'yh_finance_complete': 'yh-finance.p.rapidapi.com',
+            'yahoo_finance_api_data': 'yahoo-finance-api1.p.rapidapi.com',
+            'yahoo_finance_realtime': 'yahoo-finance-low-latency.p.rapidapi.com',
+            'yh_finance': 'yh-finance-complete.p.rapidapi.com',
+            'yahoo_finance_basic': 'yahoo-finance127.p.rapidapi.com',
+            'seeking_alpha': 'seeking-alpha.p.rapidapi.com',
+            'webull': 'webull.p.rapidapi.com',
+            'morning_star': 'morningstar1.p.rapidapi.com',
+            'tradingview': 'tradingview-ta.p.rapidapi.com',
+            'investing_com': 'investing-cryptocurrency-markets.p.rapidapi.com',
+            'finance_api': 'real-time-finance-data.p.rapidapi.com',
+            'ms_finance': 'ms-finance.p.rapidapi.com',
+            'sec_filings': 'sec-filings.p.rapidapi.com',
+            'exchangerate_api': 'exchangerate-api.p.rapidapi.com',
+            'crypto_news': 'cryptocurrency-news2.p.rapidapi.com'
+        }
+        
+        # 测试端点配置
+        self.test_endpoints = {
+            'alpha_vantage': '/query?function=GLOBAL_QUOTE&symbol=AAPL',
+            'yahoo_finance_1': '/api/yahoo/qu/quote/AAPL',
+            'yh_finance_complete': '/stock/v2/get-summary?symbol=AAPL',
+            'yahoo_finance_api_data': '/v8/finance/chart/AAPL',
+            'yahoo_finance_realtime': '/stock/v2/get-summary?symbol=AAPL',
+            'yh_finance': '/stock/v2/get-summary?symbol=AAPL',
+            'yahoo_finance_basic': '/api/yahoo/qu/quote/AAPL',
+            'seeking_alpha': '/symbols/get-profile?symbols=AAPL',
+            'webull': '/stock/search?keyword=AAPL',
+            'morning_star': '/market/v2/get-movers?performanceId=0P0000OQN8',
+            'tradingview': '/get-analysis?symbol=AAPL&screener=america&exchange=NASDAQ',
+            'investing_com': '/coins/get-overview',
+            'finance_api': '/stock-price?symbol=AAPL',
+            'ms_finance': '/stock/v2/get-summary?symbol=AAPL',
+            'sec_filings': '/search?query=AAPL',
+            'exchangerate_api': '/latest?base=USD',
+            'crypto_news': '/v1/cryptonews'
+        }
+        
+        self.results = {}
+    
+    def test_api(self, api_name: str) -> Dict[str, Any]:
+        """
+        测试单个API
+        
+        Args:
+            api_name: API名称
+            
+        Returns:
+            测试结果
+        """
+        if api_name not in self.api_configs:
+            return {
+                'success': False,
+                'error': 'API not configured',
+                'status_code': None,
+                'response_time': 0
+            }
+        
+        host = self.api_configs[api_name]
+        endpoint = self.test_endpoints.get(api_name, '/')
+        
+        headers = {
+            'X-RapidAPI-Key': self.api_key,
+            'X-RapidAPI-Host': host,
+            'Content-Type': 'application/json'
+        }
+        
+        url = f"https://{host}{endpoint}"
+        
+        print(f"🧪 测试 {api_name} ({host})")
+        print(f"   URL: {url}")
+        
+        start_time = time.time()
+        
+        try:
+            response = requests.get(url, headers=headers, timeout=10)
+            response_time = time.time() - start_time
+            
+            result = {
+                'success': response.status_code == 200,
+                'status_code': response.status_code,
+                'response_time': round(response_time, 2),
+                'response_size': len(response.text),
+                'error': None if response.status_code == 200 else response.text[:200]
+            }
+            
+            if response.status_code == 200:
+                print(f"   ✅ 成功 - {response_time:.2f}s - {len(response.text)} bytes")
+                # 尝试解析JSON
+                try:
+                    data = response.json()
+                    result['has_data'] = bool(data)
+                    result['data_keys'] = list(data.keys()) if isinstance(data, dict) else []
+                except:
+                    result['has_data'] = False
+                    result['data_keys'] = []
+            else:
+                print(f"   ❌ 失败 - HTTP {response.status_code}")
+                print(f"   错误: {response.text[:100]}...")
+            
+            return result
+            
+        except requests.exceptions.Timeout:
+            print(f"   ⏰ 超时")
+            return {
+                'success': False,
+                'error': 'Request timeout',
+                'status_code': None,
+                'response_time': 10.0
+            }
+        except requests.exceptions.RequestException as e:
+            print(f"   ❌ 请求异常: {str(e)}")
+            return {
+                'success': False,
+                'error': f'Request error: {str(e)}',
+                'status_code': None,
+                'response_time': time.time() - start_time
+            }
+        except Exception as e:
+            print(f"   ❌ 未知异常: {str(e)}")
+            return {
+                'success': False,
+                'error': f'Unexpected error: {str(e)}',
+                'status_code': None,
+                'response_time': time.time() - start_time
+            }
+    
+    def test_all_apis(self) -> Dict[str, Any]:
+        """测试所有API"""
+        print("🚀 开始测试所有RapidAPI服务")
+        print("=" * 60)
+        
+        for api_name in self.api_configs.keys():
+            result = self.test_api(api_name)
+            self.results[api_name] = result
+            time.sleep(1)  # 避免请求过快
+            print()
+        
+        return self.results
+    
+    def generate_report(self) -> str:
+        """生成测试报告"""
+        if not self.results:
+            return "没有测试结果"
+        
+        # 统计
+        total_apis = len(self.results)
+        successful_apis = len([r for r in self.results.values() if r['success']])
+        failed_apis = total_apis - successful_apis
+        
+        # 按状态分类
+        success_list = []
+        failed_list = []
+        
+        for api_name, result in self.results.items():
+            if result['success']:
+                success_list.append({
+                    'name': api_name,
+                    'host': self.api_configs[api_name],
+                    'response_time': result['response_time'],
+                    'data_keys': result.get('data_keys', [])
+                })
+            else:
+                failed_list.append({
+                    'name': api_name,
+                    'host': self.api_configs[api_name],
+                    'error': result['error'],
+                    'status_code': result['status_code']
+                })
+        
+        # 生成报告
+        report = f"""# RapidAPI 测试报告
+
+## 📊 测试概览
+
+- **测试时间**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+- **总API数**: {total_apis}
+- **成功数**: {successful_apis} ({successful_apis/total_apis*100:.1f}%)
+- **失败数**: {failed_apis} ({failed_apis/total_apis*100:.1f}%)
+
+## ✅ 可用的API ({len(success_list)}个)
+
+"""
+        
+        for api in sorted(success_list, key=lambda x: x['response_time']):
+            report += f"### {api['name']}\n"
+            report += f"- **主机**: `{api['host']}`\n"
+            report += f"- **响应时间**: {api['response_time']}s\n"
+            if api['data_keys']:
+                report += f"- **数据字段**: {', '.join(api['data_keys'][:5])}\n"
+            report += "\n"
+        
+        report += f"## ❌ 失败的API ({len(failed_list)}个)\n\n"
+        
+        for api in failed_list:
+            report += f"### {api['name']}\n"
+            report += f"- **主机**: `{api['host']}`\n"
+            report += f"- **状态码**: {api['status_code']}\n"
+            report += f"- **错误**: {api['error'][:100] if api['error'] else 'Unknown'}...\n"
+            report += "\n"
+        
+        # 建议
+        report += """## 🔧 优化建议
+
+### 立即可用的API
+"""
+        
+        fast_apis = [api for api in success_list if api['response_time'] < 2.0]
+        if fast_apis:
+            report += "以下API响应快速，建议优先使用：\n"
+            for api in fast_apis:
+                report += f"- **{api['name']}**: {api['response_time']}s\n"
+        
+        report += """
+### 需要修复的API
+"""
+        
+        if failed_list:
+            report += "以下API需要检查端点配置或权限：\n"
+            for api in failed_list[:5]:  # 只显示前5个
+                report += f"- **{api['name']}**: {api['error'][:50] if api['error'] else 'Unknown error'}...\n"
+        
+        return report
+    
+    def save_report(self, filename: str = None):
+        """保存报告到文件"""
+        if not filename:
+            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+            filename = f"docs/rapidapi/test_report_{timestamp}.md"
+        
+        report = self.generate_report()
+        
+        with open(filename, 'w', encoding='utf-8') as f:
+            f.write(report)
+        
+        print(f"📄 报告已保存到: {filename}")
+        return filename
+
+def main():
+    """主函数"""
+    print("🧪 RapidAPI库存测试工具")
+    print("=" * 40)
+    
+    try:
+        tester = RapidAPITester()
+        
+        # 测试所有API
+        results = tester.test_all_apis()
+        
+        # 生成并显示报告
+        print("\n" + "=" * 60)
+        print("📊 测试完成，生成报告...")
+        
+        report = tester.generate_report()
+        print(report)
+        
+        # 保存报告
+        filename = tester.save_report()
+        
+        # 更新库存文档
+        print(f"\n💡 建议更新 docs/rapidapi/api_inventory.md")
+        print(f"📁 详细报告: {filename}")
+        
+    except Exception as e:
+        print(f"❌ 测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    main()