tts/scripts/initialize_characters.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
播客角色初始化脚本
根据 chapter8.md 文件中的角色定义进行初始化
"""

import os
import re
from datetime import datetime

def parse_characters_from_md(file_path):
    """从chapter8.md文件中解析角色信息"""
    if not os.path.exists(file_path):
        print(f"错误: 文件 {file_path} 不存在")
        return []

    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # 使用正则表达式匹配角色定义
    # 格式如: "角色名 (描述)：风格：..."
    pattern = r'^([A-Za-z\u4e00-\u9fa5\s]+?)\s*[:：]\s*\n?([^#\n]*?)(?:\n|$)'
    matches = re.findall(pattern, content, re.MULTILINE)

    characters = []
    for match in matches:
        role_desc = match[0].strip()
        details = match[1].strip()

        # 分析角色描述，例如 "Sonia (Host)" 或 "Graham (硅谷)"
        if '(' in role_desc and ')' in role_desc:
            name = role_desc.split('(')[0].strip()
            role = role_desc.split('(')[1].split(')')[0].strip()
        else:
            name = role_desc
            role = "未知角色"

        # 解析风格描述
        accent = ""
        voice_rec = ""
        if "风格：" in details:
            parts = details.split("风格：")
            if len(parts) > 1:
                accent = parts[1].split("推荐语音：")[0].strip()
                if "推荐语音：" in details:
                    voice_parts = details.split("推荐语音：")
                    if len(voice_parts) > 1:
                        voice_rec = voice_parts[1].strip()

        characters.append({
            "name": name,
            "role": role,
            "accent": accent,
            "voice_recommendation": voice_rec
        })

    # 手动添加在文本中明确提及的角色
    additional_chars = [
        {
            "name": "Sonia",
            "role": "Host (主持人)",
            "accent": "冷静、客观、甚至带点冷幽默",
            "voice_recommendation": "Edge TTS 的 en-GB-RyanNeural（男）或 en-US-JennyNeural（女）"
        },
        {
            "name": "Author",
            "role": "作者",
            "accent": "分析性，权威性",
            "voice_recommendation": "en-US-GuyNeural"
        }
    ]

    # 避免重复
    for char in additional_chars:
        if not any(c["name"] == char["name"] for c in characters):
            characters.append(char)

    return characters

def initialize_characters():
    """初始化所有角色"""
    print("=== 播客角色初始化 ===")
    print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print()

    # 从chapter8.md解析角色
    characters = parse_characters_from_md("/root/tts/plan/chapter8.md")

    if not characters:
        print("未找到角色定义，使用默认角色...")
        characters = [
            {
                "name": "Sonia",
                "role": "Host (主持人)",
                "accent": "冷静、客观、甚至带点冷幽默",
                "voice_recommendation": "Edge TTS 的 en-GB-RyanNeural（男）或 en-US-JennyNeural（女）"
            },
            {
                "name": "Graham",
                "role": "硅谷",
                "accent": "典型的 American Tech Bro，语速快，自信",
                "voice_recommendation": "Edge TTS 的 en-US-GuyNeural 或 en-US-ChristopherNeural"
            },
            {
                "name": "Dmitri",
                "role": "俄罗斯",
                "accent": "深沉，重音在后",
                "voice_recommendation": "en-IE-ConnorNeural（爱尔兰音，稍微带点卷舌和厚重感）"
            },
            {
                "name": "Amita",
                "role": "印度",
                "accent": "语速快，清晰的印度口音",
                "voice_recommendation": "en-IN-NeerjaNeural（女）或 en-IN-PrabhatNeural（男）"
            },
            {
                "name": "穆罕默德",
                "role": "中东",
                "accent": "沧桑，缓慢",
                "voice_recommendation": "en-EG-SalmaNeural（埃及英语）"
            },
            {
                "name": "Author",
                "role": "作者",
                "accent": "分析性，权威性",
                "voice_recommendation": "en-US-GuyNeural"
            }
        ]

    print(f"找到 {len(characters)} 个角色:")
    print()

    # 创建角色目录
    os.makedirs("output/characters", exist_ok=True)

    for i, char in enumerate(characters, 1):
        print(f"{i}. {char['name']} ({char['role']})")
        print(f"   风格: {char['accent']}")
        print(f"   推荐语音: {char['voice_recommendation']}")
        print()

        # 创建角色配置文件
        config_content = f"""角色配置文件
名称: {char['name']}
角色: {char['role']}
风格: {char['accent']}
推荐语音: {char['voice_recommendation']}
初始化时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
状态: 已初始化
"""
        config_path = f"output/characters/{char['name'].lower()}_config.txt"
        with open(config_path, 'w', encoding='utf-8') as f:
            f.write(config_content)

    print(f"✓ 所有 {len(characters)} 个角色已初始化完成")
    print(f"✓ 配置文件已保存到 output/characters/ 目录")

    # 创建总体角色清单
    summary_path = "output/characters/character_summary.txt"
    with open(summary_path, 'w', encoding='utf-8') as f:
        f.write("播客角色清单\n")
        f.write("=" * 50 + "\n")
        f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
        for i, char in enumerate(characters, 1):
            f.write(f"{i}. {char['name']} ({char['role']})\n")
            f.write(f"   风格: {char['accent']}\n")
            f.write(f"   推荐语音: {char['voice_recommendation']}\n\n")

    print(f"✓ 角色清单已保存到: {summary_path}")

    return characters

if __name__ == "__main__":
    initialize_characters()