Files
tts/scripts/initialize_characters.py
2026-01-19 10:27:41 +08:00

175 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
播客角色初始化脚本
根据 chapter8.md 文件中的角色定义进行初始化
"""
import os
import re
from datetime import datetime
def parse_characters_from_md(file_path):
"""从chapter8.md文件中解析角色信息"""
if not os.path.exists(file_path):
print(f"错误: 文件 {file_path} 不存在")
return []
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 使用正则表达式匹配角色定义
# 格式如: "角色名 (描述):风格:..."
pattern = r'^([A-Za-z\u4e00-\u9fa5\s]+?)\s*[:]\s*\n?([^#\n]*?)(?:\n|$)'
matches = re.findall(pattern, content, re.MULTILINE)
characters = []
for match in matches:
role_desc = match[0].strip()
details = match[1].strip()
# 分析角色描述,例如 "Sonia (Host)" 或 "Graham (硅谷)"
if '(' in role_desc and ')' in role_desc:
name = role_desc.split('(')[0].strip()
role = role_desc.split('(')[1].split(')')[0].strip()
else:
name = role_desc
role = "未知角色"
# 解析风格描述
accent = ""
voice_rec = ""
if "风格:" in details:
parts = details.split("风格:")
if len(parts) > 1:
accent = parts[1].split("推荐语音:")[0].strip()
if "推荐语音:" in details:
voice_parts = details.split("推荐语音:")
if len(voice_parts) > 1:
voice_rec = voice_parts[1].strip()
characters.append({
"name": name,
"role": role,
"accent": accent,
"voice_recommendation": voice_rec
})
# 手动添加在文本中明确提及的角色
additional_chars = [
{
"name": "Sonia",
"role": "Host (主持人)",
"accent": "冷静、客观、甚至带点冷幽默",
"voice_recommendation": "Edge TTS 的 en-GB-RyanNeural或 en-US-JennyNeural"
},
{
"name": "Author",
"role": "作者",
"accent": "分析性,权威性",
"voice_recommendation": "en-US-GuyNeural"
}
]
# 避免重复
for char in additional_chars:
if not any(c["name"] == char["name"] for c in characters):
characters.append(char)
return characters
def initialize_characters():
"""初始化所有角色"""
print("=== 播客角色初始化 ===")
print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
# 从chapter8.md解析角色
characters = parse_characters_from_md("/root/tts/plan/chapter8.md")
if not characters:
print("未找到角色定义,使用默认角色...")
characters = [
{
"name": "Sonia",
"role": "Host (主持人)",
"accent": "冷静、客观、甚至带点冷幽默",
"voice_recommendation": "Edge TTS 的 en-GB-RyanNeural或 en-US-JennyNeural"
},
{
"name": "Graham",
"role": "硅谷",
"accent": "典型的 American Tech Bro语速快自信",
"voice_recommendation": "Edge TTS 的 en-US-GuyNeural 或 en-US-ChristopherNeural"
},
{
"name": "Dmitri",
"role": "俄罗斯",
"accent": "深沉,重音在后",
"voice_recommendation": "en-IE-ConnorNeural爱尔兰音稍微带点卷舌和厚重感"
},
{
"name": "Amita",
"role": "印度",
"accent": "语速快,清晰的印度口音",
"voice_recommendation": "en-IN-NeerjaNeural或 en-IN-PrabhatNeural"
},
{
"name": "穆罕默德",
"role": "中东",
"accent": "沧桑,缓慢",
"voice_recommendation": "en-EG-SalmaNeural埃及英语"
},
{
"name": "Author",
"role": "作者",
"accent": "分析性,权威性",
"voice_recommendation": "en-US-GuyNeural"
}
]
print(f"找到 {len(characters)} 个角色:")
print()
# 创建角色目录
os.makedirs("output/characters", exist_ok=True)
for i, char in enumerate(characters, 1):
print(f"{i}. {char['name']} ({char['role']})")
print(f" 风格: {char['accent']}")
print(f" 推荐语音: {char['voice_recommendation']}")
print()
# 创建角色配置文件
config_content = f"""角色配置文件
名称: {char['name']}
角色: {char['role']}
风格: {char['accent']}
推荐语音: {char['voice_recommendation']}
初始化时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
状态: 已初始化
"""
config_path = f"output/characters/{char['name'].lower()}_config.txt"
with open(config_path, 'w', encoding='utf-8') as f:
f.write(config_content)
print(f"✓ 所有 {len(characters)} 个角色已初始化完成")
print(f"✓ 配置文件已保存到 output/characters/ 目录")
# 创建总体角色清单
summary_path = "output/characters/character_summary.txt"
with open(summary_path, 'w', encoding='utf-8') as f:
f.write("播客角色清单\n")
f.write("=" * 50 + "\n")
f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
for i, char in enumerate(characters, 1):
f.write(f"{i}. {char['name']} ({char['role']})\n")
f.write(f" 风格: {char['accent']}\n")
f.write(f" 推荐语音: {char['voice_recommendation']}\n\n")
print(f"✓ 角色清单已保存到: {summary_path}")
return characters
if __name__ == "__main__":
initialize_characters()