#!/usr/bin/env python3 """ MOSS-TTSD 播客生成器 - 简化版 直接生成到 /root/tts/podcast_audios/ """ import os import subprocess import sys # 配置 OUTPUT_DIR = "/root/tts/podcast_audios" MODEL_DIR = "/root/tts/MOSS-TTSD" def generate_podcast(script_file, output_name): """ 生成播客并直接保存到 podcast_audios 参数: script_file: 对话脚本文件路径 (.txt格式,包含[S1] [S2]标签) output_name: 输出文件名 (不需要.wav后缀) """ print(f"🎙️ 生成播客: {output_name}") print("=" * 50) # 检查模型 if not os.path.exists(f"{MODEL_DIR}/MOSS-TTSD-v0.7"): print("❌ MOSS-TTSD模型未下载") return False # 检查脚本文件 if not os.path.exists(script_file): print(f"❌ 脚本文件不存在: {script_file}") return False # 创建临时JSONL文件 import json import tempfile # 读取脚本 with open(script_file, 'r', encoding='utf-8') as f: script_text = f.read().strip() # 创建对话数据 dialogue_data = { "id": 1, "base_path": "/root/tts/hosts", "text": script_text, "prompt_audio_speaker1": "ben_guanquelou.wav", "prompt_text_speaker1": "白日依山尽,黄河入海流,欲穷千里目,更上一层楼。", "prompt_audio_speaker2": "judy_dalingtaohua_trim.wav", "prompt_text_speaker2": "大林寺桃花,人间四月芳菲尽,山寺桃花始盛开。" } # 创建临时文件 with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f: json.dump(dialogue_data, f, ensure_ascii=False) f.write('\n') temp_jsonl = f.name print(f"✅ 脚本加载成功: {len(script_text)} 字符") # 生成音频到临时位置 print("🎬 正在生成音频...") cmd = [ sys.executable, f"{MODEL_DIR}/inference.py", "--jsonl", temp_jsonl, "--output_dir", "/tmp", "--attn_implementation", "sdpa", "--use_normalize", "--silence_duration", "0.12", "--seed", "42" ] result = subprocess.run(cmd, capture_output=True, text=True) # 删除临时JSONL文件 os.unlink(temp_jsonl) if result.returncode != 0: print("❌ 音频生成失败") print(result.stderr) return False # 检查生成的音频 temp_audio = "/tmp/output_0.wav" if not os.path.exists(temp_audio): print("❌ 音频文件未生成") return False # 复制到目标位置 output_path = f"{OUTPUT_DIR}/{output_name}.wav" subprocess.run(["cp", temp_audio, output_path], check=True) os.unlink(temp_audio) # 获取音频信息 probe_result = subprocess.run( ["ffprobe", output_path, "-v", "quiet", "-show_streams"], capture_output=True, text=True ) duration = "未知" if probe_result.returncode == 0: for line in probe_result.stdout.split('\n'): if line.startswith("duration="): duration = f"{float(line.split('=')[1]):.1f}秒" break file_size = os.path.getsize(output_path) / (1024 * 1024) print(f"✅ 生成成功!") print(f"📁 文件位置: {output_path}") print(f"📊 文件大小: {file_size:.1f}MB") print(f"⏱️ 音频时长: {duration}") print() print("🎧 播放命令:") print(f" ffplay {output_path}") print(f" # 或") print(f" aplay {output_path}") return True def main(): if len(sys.argv) != 3: print("用法:") print(f" {sys.argv[0]} <脚本文件> <输出名称>") print() print("示例:") print(f" {sys.argv[0]} chapter8_script.txt chapter8_demo") print() print("脚本文件格式: 纯文本,包含[S1] [S2]标签") print("输出名称: 不需要加.wav后缀") sys.exit(1) script_file = sys.argv[1] output_name = sys.argv[2] generate_podcast(script_file, output_name) if __name__ == "__main__": main()