Initial commit for TTS project

This commit is contained in:
Ben
2026-01-19 10:27:41 +08:00
commit a9abd3913d
160 changed files with 11031 additions and 0 deletions

View File

@@ -0,0 +1,175 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
播客角色初始化脚本
根据 chapter8.md 文件中的角色定义进行初始化
"""
import os
import re
from datetime import datetime
def parse_characters_from_md(file_path):
"""从chapter8.md文件中解析角色信息"""
if not os.path.exists(file_path):
print(f"错误: 文件 {file_path} 不存在")
return []
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 使用正则表达式匹配角色定义
# 格式如: "角色名 (描述):风格:..."
pattern = r'^([A-Za-z\u4e00-\u9fa5\s]+?)\s*[:]\s*\n?([^#\n]*?)(?:\n|$)'
matches = re.findall(pattern, content, re.MULTILINE)
characters = []
for match in matches:
role_desc = match[0].strip()
details = match[1].strip()
# 分析角色描述,例如 "Sonia (Host)" 或 "Graham (硅谷)"
if '(' in role_desc and ')' in role_desc:
name = role_desc.split('(')[0].strip()
role = role_desc.split('(')[1].split(')')[0].strip()
else:
name = role_desc
role = "未知角色"
# 解析风格描述
accent = ""
voice_rec = ""
if "风格:" in details:
parts = details.split("风格:")
if len(parts) > 1:
accent = parts[1].split("推荐语音:")[0].strip()
if "推荐语音:" in details:
voice_parts = details.split("推荐语音:")
if len(voice_parts) > 1:
voice_rec = voice_parts[1].strip()
characters.append({
"name": name,
"role": role,
"accent": accent,
"voice_recommendation": voice_rec
})
# 手动添加在文本中明确提及的角色
additional_chars = [
{
"name": "Sonia",
"role": "Host (主持人)",
"accent": "冷静、客观、甚至带点冷幽默",
"voice_recommendation": "Edge TTS 的 en-GB-RyanNeural或 en-US-JennyNeural"
},
{
"name": "Author",
"role": "作者",
"accent": "分析性,权威性",
"voice_recommendation": "en-US-GuyNeural"
}
]
# 避免重复
for char in additional_chars:
if not any(c["name"] == char["name"] for c in characters):
characters.append(char)
return characters
def initialize_characters():
"""初始化所有角色"""
print("=== 播客角色初始化 ===")
print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()
# 从chapter8.md解析角色
characters = parse_characters_from_md("/root/tts/plan/chapter8.md")
if not characters:
print("未找到角色定义,使用默认角色...")
characters = [
{
"name": "Sonia",
"role": "Host (主持人)",
"accent": "冷静、客观、甚至带点冷幽默",
"voice_recommendation": "Edge TTS 的 en-GB-RyanNeural或 en-US-JennyNeural"
},
{
"name": "Graham",
"role": "硅谷",
"accent": "典型的 American Tech Bro语速快自信",
"voice_recommendation": "Edge TTS 的 en-US-GuyNeural 或 en-US-ChristopherNeural"
},
{
"name": "Dmitri",
"role": "俄罗斯",
"accent": "深沉,重音在后",
"voice_recommendation": "en-IE-ConnorNeural爱尔兰音稍微带点卷舌和厚重感"
},
{
"name": "Amita",
"role": "印度",
"accent": "语速快,清晰的印度口音",
"voice_recommendation": "en-IN-NeerjaNeural或 en-IN-PrabhatNeural"
},
{
"name": "穆罕默德",
"role": "中东",
"accent": "沧桑,缓慢",
"voice_recommendation": "en-EG-SalmaNeural埃及英语"
},
{
"name": "Author",
"role": "作者",
"accent": "分析性,权威性",
"voice_recommendation": "en-US-GuyNeural"
}
]
print(f"找到 {len(characters)} 个角色:")
print()
# 创建角色目录
os.makedirs("output/characters", exist_ok=True)
for i, char in enumerate(characters, 1):
print(f"{i}. {char['name']} ({char['role']})")
print(f" 风格: {char['accent']}")
print(f" 推荐语音: {char['voice_recommendation']}")
print()
# 创建角色配置文件
config_content = f"""角色配置文件
名称: {char['name']}
角色: {char['role']}
风格: {char['accent']}
推荐语音: {char['voice_recommendation']}
初始化时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
状态: 已初始化
"""
config_path = f"output/characters/{char['name'].lower()}_config.txt"
with open(config_path, 'w', encoding='utf-8') as f:
f.write(config_content)
print(f"✓ 所有 {len(characters)} 个角色已初始化完成")
print(f"✓ 配置文件已保存到 output/characters/ 目录")
# 创建总体角色清单
summary_path = "output/characters/character_summary.txt"
with open(summary_path, 'w', encoding='utf-8') as f:
f.write("播客角色清单\n")
f.write("=" * 50 + "\n")
f.write(f"生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
for i, char in enumerate(characters, 1):
f.write(f"{i}. {char['name']} ({char['role']})\n")
f.write(f" 风格: {char['accent']}\n")
f.write(f" 推荐语音: {char['voice_recommendation']}\n\n")
print(f"✓ 角色清单已保存到: {summary_path}")
return characters
if __name__ == "__main__":
initialize_characters()