143 lines
4.1 KiB
Python
Executable File
143 lines
4.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
MOSS-TTSD 播客生成器 - 简化版
|
||
直接生成到 /root/tts/podcast_audios/
|
||
"""
|
||
|
||
import os
|
||
import subprocess
|
||
import sys
|
||
|
||
# 配置
|
||
OUTPUT_DIR = "/root/tts/podcast_audios"
|
||
MODEL_DIR = "/root/tts/MOSS-TTSD"
|
||
|
||
def generate_podcast(script_file, output_name):
|
||
"""
|
||
生成播客并直接保存到 podcast_audios
|
||
|
||
参数:
|
||
script_file: 对话脚本文件路径 (.txt格式,包含[S1] [S2]标签)
|
||
output_name: 输出文件名 (不需要.wav后缀)
|
||
"""
|
||
|
||
print(f"🎙️ 生成播客: {output_name}")
|
||
print("=" * 50)
|
||
|
||
# 检查模型
|
||
if not os.path.exists(f"{MODEL_DIR}/MOSS-TTSD-v0.7"):
|
||
print("❌ MOSS-TTSD模型未下载")
|
||
return False
|
||
|
||
# 检查脚本文件
|
||
if not os.path.exists(script_file):
|
||
print(f"❌ 脚本文件不存在: {script_file}")
|
||
return False
|
||
|
||
# 创建临时JSONL文件
|
||
import json
|
||
import tempfile
|
||
|
||
# 读取脚本
|
||
with open(script_file, 'r', encoding='utf-8') as f:
|
||
script_text = f.read().strip()
|
||
|
||
# 创建对话数据
|
||
dialogue_data = {
|
||
"id": 1,
|
||
"base_path": "/root/tts/hosts",
|
||
"text": script_text,
|
||
"prompt_audio_speaker1": "ben_guanquelou.wav",
|
||
"prompt_text_speaker1": "白日依山尽,黄河入海流,欲穷千里目,更上一层楼。",
|
||
"prompt_audio_speaker2": "judy_dalingtaohua_trim.wav",
|
||
"prompt_text_speaker2": "大林寺桃花,人间四月芳菲尽,山寺桃花始盛开。"
|
||
}
|
||
|
||
# 创建临时文件
|
||
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f:
|
||
json.dump(dialogue_data, f, ensure_ascii=False)
|
||
f.write('\n')
|
||
temp_jsonl = f.name
|
||
|
||
print(f"✅ 脚本加载成功: {len(script_text)} 字符")
|
||
|
||
# 生成音频到临时位置
|
||
print("🎬 正在生成音频...")
|
||
cmd = [
|
||
sys.executable, f"{MODEL_DIR}/inference.py",
|
||
"--jsonl", temp_jsonl,
|
||
"--output_dir", "/tmp",
|
||
"--attn_implementation", "sdpa",
|
||
"--use_normalize",
|
||
"--silence_duration", "0.12",
|
||
"--seed", "42"
|
||
]
|
||
|
||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||
|
||
# 删除临时JSONL文件
|
||
os.unlink(temp_jsonl)
|
||
|
||
if result.returncode != 0:
|
||
print("❌ 音频生成失败")
|
||
print(result.stderr)
|
||
return False
|
||
|
||
# 检查生成的音频
|
||
temp_audio = "/tmp/output_0.wav"
|
||
if not os.path.exists(temp_audio):
|
||
print("❌ 音频文件未生成")
|
||
return False
|
||
|
||
# 复制到目标位置
|
||
output_path = f"{OUTPUT_DIR}/{output_name}.wav"
|
||
subprocess.run(["cp", temp_audio, output_path], check=True)
|
||
os.unlink(temp_audio)
|
||
|
||
# 获取音频信息
|
||
probe_result = subprocess.run(
|
||
["ffprobe", output_path, "-v", "quiet", "-show_streams"],
|
||
capture_output=True, text=True
|
||
)
|
||
|
||
duration = "未知"
|
||
if probe_result.returncode == 0:
|
||
for line in probe_result.stdout.split('\n'):
|
||
if line.startswith("duration="):
|
||
duration = f"{float(line.split('=')[1]):.1f}秒"
|
||
break
|
||
|
||
file_size = os.path.getsize(output_path) / (1024 * 1024)
|
||
|
||
print(f"✅ 生成成功!")
|
||
print(f"📁 文件位置: {output_path}")
|
||
print(f"📊 文件大小: {file_size:.1f}MB")
|
||
print(f"⏱️ 音频时长: {duration}")
|
||
print()
|
||
print("🎧 播放命令:")
|
||
print(f" ffplay {output_path}")
|
||
print(f" # 或")
|
||
print(f" aplay {output_path}")
|
||
|
||
return True
|
||
|
||
def main():
|
||
if len(sys.argv) != 3:
|
||
print("用法:")
|
||
print(f" {sys.argv[0]} <脚本文件> <输出名称>")
|
||
print()
|
||
print("示例:")
|
||
print(f" {sys.argv[0]} chapter8_script.txt chapter8_demo")
|
||
print()
|
||
print("脚本文件格式: 纯文本,包含[S1] [S2]标签")
|
||
print("输出名称: 不需要加.wav后缀")
|
||
sys.exit(1)
|
||
|
||
script_file = sys.argv[1]
|
||
output_name = sys.argv[2]
|
||
|
||
generate_podcast(script_file, output_name)
|
||
|
||
if __name__ == "__main__":
|
||
main()
|