Initial commit for TTS project

2026-01-19 10:27:41 +08:00
commit a9abd3913d
160 changed files with 11031 additions and 0 deletions
--- a/scripts/initialize_characters.py
+++ b/scripts/initialize_characters.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+播客角色初始化脚本
+根据 chapter8.md 文件中的角色定义进行初始化
+"""
+
+import os
+import re
+from datetime import datetime
+
+def parse_characters_from_md(file_path):
+    """从chapter8.md文件中解析角色信息"""
+    if not os.path.exists(file_path):
+        print(f"错误: 文件 {file_path} 不存在")
+        return []
+    
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    
+    # 使用正则表达式匹配角色定义
+    # 格式如: "角色名 (描述)：风格：..."
+    pattern = r'^([A-Za-z\u4e00-\u9fa5\s]+?)\s*[:：]\s*\n?([^#\n]*?)(?:\n|$)'
+    matches = re.findall(pattern, content, re.MULTILINE)
+    
+    characters = []
+    for match in matches:
+        role_desc = match[0].strip()
+        details = match[1].strip()
+        
+        # 分析角色描述，例如 "Sonia (Host)" 或 "Graham (硅谷)"
+        if '(' in role_desc and ')' in role_desc:
+            name = role_desc.split('(')[0].strip()
+            role = role_desc.split('(')[1].split(')')[0].strip()
+        else:
+            name = role_desc
+            role = "未知角色"
+        
+        # 解析风格描述
+        accent = ""
+        voice_rec = ""
+        if "风格：" in details:
+            parts = details.split("风格：")
+            if len(parts) > 1:
+                accent = parts[1].split("推荐语音：")[0].strip()
+                if "推荐语音：" in details:
+                    voice_parts = details.split("推荐语音：")
+                    if len(voice_parts) > 1:
+                        voice_rec = voice_parts[1].strip()
+        
+        characters.append({
+            "name": name,
+            "role": role,
+            "accent": accent,
+            "voice_recommendation": voice_rec
+        })
+    
+    # 手动添加在文本中明确提及的角色
+    additional_chars = [
+        {
+            "name": "Sonia",
+            "role": "Host (主持人)",
+            "accent": "冷静、客观、甚至带点冷幽默",
+            "voice_recommendation": "Edge TTS 的 en-GB-RyanNeural（男）或 en-US-JennyNeural（女）"
+        },
+        {
+            "name": "Author",
+            "role": "作者",
+            "accent": "分析性，权威性",
+            "voice_recommendation": "en-US-GuyNeural"
+        }
+    ]
+    
+    # 避免重复
+    for char in additional_chars:
+        if not any(c["name"] == char["name"] for c in characters):
+            characters.append(char)
+    
+    return characters
+
+def initialize_characters():
+    """初始化所有角色"""
+    print("=== 播客角色初始化 ===")
+    print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print()
+    
+    # 从chapter8.md解析角色
+    characters = parse_characters_from_md("/root/tts/plan/chapter8.md")
+    
+    if not characters:
+        print("未找到角色定义，使用默认角色...")
+        characters = [
+            {
+                "name": "Sonia",
+                "role": "Host (主持人)",
+                "accent": "冷静、客观、甚至带点冷幽默",
+                "voice_recommendation": "Edge TTS 的 en-GB-RyanNeural（男）或 en-US-JennyNeural（女）"
+            },
+            {
+                "name": "Graham",
+                "role": "硅谷",
+                "accent": "典型的 American Tech Bro，语速快，自信",
+                "voice_recommendation": "Edge TTS 的 en-US-GuyNeural 或 en-US-ChristopherNeural"
+            },
+            {
+                "name": "Dmitri",
+                "role": "俄罗斯",
+                "accent": "深沉，重音在后",
+                "voice_recommendation": "en-IE-ConnorNeural（爱尔兰音，稍微带点卷舌和厚重感）"
+            },
+            {
+                "name": "Amita",
+                "role": "印度",
+                "accent": "语速快，清晰的印度口音",
+                "voice_recommendation": "en-IN-NeerjaNeural（女）或 en-IN-PrabhatNeural（男）"
+            },
+            {
+                "name": "穆罕默德",
+                "role": "中东",
+                "accent": "沧桑，缓慢",
+                "voice_recommendation": "en-EG-SalmaNeural（埃及英语）"
+            },
+            {
+                "name": "Author",
+                "role": "作者",
+                "accent": "分析性，权威性",
+                "voice_recommendation": "en-US-GuyNeural"
+            }
+        ]
+    
+    print(f"找到 {len(characters)} 个角色:")
+    print()
+    
+    # 创建角色目录
+    os.makedirs("output/characters", exist_ok=True)
+    
+    for i, char in enumerate(characters, 1):
+        print(f"{i}. {char['name']} ({char['role']})")
+        print(f"   风格: {char['accent']}")
+        print(f"   推荐语音: {char['voice_recommendation']}")
+        print()
+        
+        # 创建角色配置文件
+        config_content = f"""角色配置文件
+名称: {char['name']}
+角色: {char['role']}
+风格: {char['accent']}
+推荐语音: {char['voice_recommendation']}
+初始化时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+状态: 已初始化
+"""
+        config_path = f"output/characters/{char['name'].lower()}_config.txt"
+        with open(config_path, 'w', encoding='utf-8') as f:
+            f.write(config_content)
+    
+    print(f"✓ 所有 {len(characters)} 个角色已初始化完成")
+    print(f"✓ 配置文件已保存到 output/characters/ 目录")
+    
+    # 创建总体角色清单
+    summary_path = "output/characters/character_summary.txt"
+    with open(summary_path, 'w', encoding='utf-8') as f:
+        f.write("播客角色清单\n")
+        f.write("=" * 50 + "\n")
+        f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
+        for i, char in enumerate(characters, 1):
+            f.write(f"{i}. {char['name']} ({char['role']})\n")
+            f.write(f"   风格: {char['accent']}\n")
+            f.write(f"   推荐语音: {char['voice_recommendation']}\n\n")
+    
+    print(f"✓ 角色清单已保存到: {summary_path}")
+    
+    return characters
+
+if __name__ == "__main__":
+    initialize_characters()