Initial commit for TTS project
This commit is contained in:
175
scripts/initialize_characters.py
Normal file
175
scripts/initialize_characters.py
Normal file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
播客角色初始化脚本
|
||||
根据 chapter8.md 文件中的角色定义进行初始化
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
def parse_characters_from_md(file_path):
|
||||
"""从chapter8.md文件中解析角色信息"""
|
||||
if not os.path.exists(file_path):
|
||||
print(f"错误: 文件 {file_path} 不存在")
|
||||
return []
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 使用正则表达式匹配角色定义
|
||||
# 格式如: "角色名 (描述):风格:..."
|
||||
pattern = r'^([A-Za-z\u4e00-\u9fa5\s]+?)\s*[::]\s*\n?([^#\n]*?)(?:\n|$)'
|
||||
matches = re.findall(pattern, content, re.MULTILINE)
|
||||
|
||||
characters = []
|
||||
for match in matches:
|
||||
role_desc = match[0].strip()
|
||||
details = match[1].strip()
|
||||
|
||||
# 分析角色描述,例如 "Sonia (Host)" 或 "Graham (硅谷)"
|
||||
if '(' in role_desc and ')' in role_desc:
|
||||
name = role_desc.split('(')[0].strip()
|
||||
role = role_desc.split('(')[1].split(')')[0].strip()
|
||||
else:
|
||||
name = role_desc
|
||||
role = "未知角色"
|
||||
|
||||
# 解析风格描述
|
||||
accent = ""
|
||||
voice_rec = ""
|
||||
if "风格:" in details:
|
||||
parts = details.split("风格:")
|
||||
if len(parts) > 1:
|
||||
accent = parts[1].split("推荐语音:")[0].strip()
|
||||
if "推荐语音:" in details:
|
||||
voice_parts = details.split("推荐语音:")
|
||||
if len(voice_parts) > 1:
|
||||
voice_rec = voice_parts[1].strip()
|
||||
|
||||
characters.append({
|
||||
"name": name,
|
||||
"role": role,
|
||||
"accent": accent,
|
||||
"voice_recommendation": voice_rec
|
||||
})
|
||||
|
||||
# 手动添加在文本中明确提及的角色
|
||||
additional_chars = [
|
||||
{
|
||||
"name": "Sonia",
|
||||
"role": "Host (主持人)",
|
||||
"accent": "冷静、客观、甚至带点冷幽默",
|
||||
"voice_recommendation": "Edge TTS 的 en-GB-RyanNeural(男)或 en-US-JennyNeural(女)"
|
||||
},
|
||||
{
|
||||
"name": "Author",
|
||||
"role": "作者",
|
||||
"accent": "分析性,权威性",
|
||||
"voice_recommendation": "en-US-GuyNeural"
|
||||
}
|
||||
]
|
||||
|
||||
# 避免重复
|
||||
for char in additional_chars:
|
||||
if not any(c["name"] == char["name"] for c in characters):
|
||||
characters.append(char)
|
||||
|
||||
return characters
|
||||
|
||||
def initialize_characters():
|
||||
"""初始化所有角色"""
|
||||
print("=== 播客角色初始化 ===")
|
||||
print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print()
|
||||
|
||||
# 从chapter8.md解析角色
|
||||
characters = parse_characters_from_md("/root/tts/plan/chapter8.md")
|
||||
|
||||
if not characters:
|
||||
print("未找到角色定义,使用默认角色...")
|
||||
characters = [
|
||||
{
|
||||
"name": "Sonia",
|
||||
"role": "Host (主持人)",
|
||||
"accent": "冷静、客观、甚至带点冷幽默",
|
||||
"voice_recommendation": "Edge TTS 的 en-GB-RyanNeural(男)或 en-US-JennyNeural(女)"
|
||||
},
|
||||
{
|
||||
"name": "Graham",
|
||||
"role": "硅谷",
|
||||
"accent": "典型的 American Tech Bro,语速快,自信",
|
||||
"voice_recommendation": "Edge TTS 的 en-US-GuyNeural 或 en-US-ChristopherNeural"
|
||||
},
|
||||
{
|
||||
"name": "Dmitri",
|
||||
"role": "俄罗斯",
|
||||
"accent": "深沉,重音在后",
|
||||
"voice_recommendation": "en-IE-ConnorNeural(爱尔兰音,稍微带点卷舌和厚重感)"
|
||||
},
|
||||
{
|
||||
"name": "Amita",
|
||||
"role": "印度",
|
||||
"accent": "语速快,清晰的印度口音",
|
||||
"voice_recommendation": "en-IN-NeerjaNeural(女)或 en-IN-PrabhatNeural(男)"
|
||||
},
|
||||
{
|
||||
"name": "穆罕默德",
|
||||
"role": "中东",
|
||||
"accent": "沧桑,缓慢",
|
||||
"voice_recommendation": "en-EG-SalmaNeural(埃及英语)"
|
||||
},
|
||||
{
|
||||
"name": "Author",
|
||||
"role": "作者",
|
||||
"accent": "分析性,权威性",
|
||||
"voice_recommendation": "en-US-GuyNeural"
|
||||
}
|
||||
]
|
||||
|
||||
print(f"找到 {len(characters)} 个角色:")
|
||||
print()
|
||||
|
||||
# 创建角色目录
|
||||
os.makedirs("output/characters", exist_ok=True)
|
||||
|
||||
for i, char in enumerate(characters, 1):
|
||||
print(f"{i}. {char['name']} ({char['role']})")
|
||||
print(f" 风格: {char['accent']}")
|
||||
print(f" 推荐语音: {char['voice_recommendation']}")
|
||||
print()
|
||||
|
||||
# 创建角色配置文件
|
||||
config_content = f"""角色配置文件
|
||||
名称: {char['name']}
|
||||
角色: {char['role']}
|
||||
风格: {char['accent']}
|
||||
推荐语音: {char['voice_recommendation']}
|
||||
初始化时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
状态: 已初始化
|
||||
"""
|
||||
config_path = f"output/characters/{char['name'].lower()}_config.txt"
|
||||
with open(config_path, 'w', encoding='utf-8') as f:
|
||||
f.write(config_content)
|
||||
|
||||
print(f"✓ 所有 {len(characters)} 个角色已初始化完成")
|
||||
print(f"✓ 配置文件已保存到 output/characters/ 目录")
|
||||
|
||||
# 创建总体角色清单
|
||||
summary_path = "output/characters/character_summary.txt"
|
||||
with open(summary_path, 'w', encoding='utf-8') as f:
|
||||
f.write("播客角色清单\n")
|
||||
f.write("=" * 50 + "\n")
|
||||
f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
||||
for i, char in enumerate(characters, 1):
|
||||
f.write(f"{i}. {char['name']} ({char['role']})\n")
|
||||
f.write(f" 风格: {char['accent']}\n")
|
||||
f.write(f" 推荐语音: {char['voice_recommendation']}\n\n")
|
||||
|
||||
print(f"✓ 角色清单已保存到: {summary_path}")
|
||||
|
||||
return characters
|
||||
|
||||
if __name__ == "__main__":
|
||||
initialize_characters()
|
||||
Reference in New Issue
Block a user