Initial commit for TTS project

This commit is contained in:
Ben
2026-01-19 10:27:41 +08:00
commit a9abd3913d
160 changed files with 11031 additions and 0 deletions

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
"""
MOSS-TTSD 播客生成器 - 简化版
直接生成到 /root/tts/podcast_audios/
"""
import os
import subprocess
import sys
# 配置
OUTPUT_DIR = "/root/tts/podcast_audios"
MODEL_DIR = "/root/tts/MOSS-TTSD"
def generate_podcast(script_file, output_name):
"""
生成播客并直接保存到 podcast_audios
参数:
script_file: 对话脚本文件路径 (.txt格式包含[S1] [S2]标签)
output_name: 输出文件名 (不需要.wav后缀)
"""
print(f"🎙️ 生成播客: {output_name}")
print("=" * 50)
# 检查模型
if not os.path.exists(f"{MODEL_DIR}/MOSS-TTSD-v0.7"):
print("❌ MOSS-TTSD模型未下载")
return False
# 检查脚本文件
if not os.path.exists(script_file):
print(f"❌ 脚本文件不存在: {script_file}")
return False
# 创建临时JSONL文件
import json
import tempfile
# 读取脚本
with open(script_file, 'r', encoding='utf-8') as f:
script_text = f.read().strip()
# 创建对话数据
dialogue_data = {
"id": 1,
"base_path": "/root/tts/hosts",
"text": script_text,
"prompt_audio_speaker1": "ben_guanquelou.wav",
"prompt_text_speaker1": "白日依山尽,黄河入海流,欲穷千里目,更上一层楼。",
"prompt_audio_speaker2": "judy_dalingtaohua_trim.wav",
"prompt_text_speaker2": "大林寺桃花,人间四月芳菲尽,山寺桃花始盛开。"
}
# 创建临时文件
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f:
json.dump(dialogue_data, f, ensure_ascii=False)
f.write('\n')
temp_jsonl = f.name
print(f"✅ 脚本加载成功: {len(script_text)} 字符")
# 生成音频到临时位置
print("🎬 正在生成音频...")
cmd = [
sys.executable, f"{MODEL_DIR}/inference.py",
"--jsonl", temp_jsonl,
"--output_dir", "/tmp",
"--attn_implementation", "sdpa",
"--use_normalize",
"--silence_duration", "0.12",
"--seed", "42"
]
result = subprocess.run(cmd, capture_output=True, text=True)
# 删除临时JSONL文件
os.unlink(temp_jsonl)
if result.returncode != 0:
print("❌ 音频生成失败")
print(result.stderr)
return False
# 检查生成的音频
temp_audio = "/tmp/output_0.wav"
if not os.path.exists(temp_audio):
print("❌ 音频文件未生成")
return False
# 复制到目标位置
output_path = f"{OUTPUT_DIR}/{output_name}.wav"
subprocess.run(["cp", temp_audio, output_path], check=True)
os.unlink(temp_audio)
# 获取音频信息
probe_result = subprocess.run(
["ffprobe", output_path, "-v", "quiet", "-show_streams"],
capture_output=True, text=True
)
duration = "未知"
if probe_result.returncode == 0:
for line in probe_result.stdout.split('\n'):
if line.startswith("duration="):
duration = f"{float(line.split('=')[1]):.1f}"
break
file_size = os.path.getsize(output_path) / (1024 * 1024)
print(f"✅ 生成成功!")
print(f"📁 文件位置: {output_path}")
print(f"📊 文件大小: {file_size:.1f}MB")
print(f"⏱️ 音频时长: {duration}")
print()
print("🎧 播放命令:")
print(f" ffplay {output_path}")
print(f" # 或")
print(f" aplay {output_path}")
return True
def main():
if len(sys.argv) != 3:
print("用法:")
print(f" {sys.argv[0]} <脚本文件> <输出名称>")
print()
print("示例:")
print(f" {sys.argv[0]} chapter8_script.txt chapter8_demo")
print()
print("脚本文件格式: 纯文本,包含[S1] [S2]标签")
print("输出名称: 不需要加.wav后缀")
sys.exit(1)
script_file = sys.argv[1]
output_name = sys.argv[2]
generate_podcast(script_file, output_name)
if __name__ == "__main__":
main()