Initial commit for TTS project
This commit is contained in:
255
scripts/generate/fish_speech_cli.py
Executable file
255
scripts/generate/fish_speech_cli.py
Executable file
@@ -0,0 +1,255 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fish Speech 命令行语音克隆脚本
|
||||
无需 Web UI,纯命令行控制
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
import signal
|
||||
from pathlib import Path
|
||||
|
||||
class FishSpeechCLI:
|
||||
def __init__(self):
|
||||
self.fish_speech_dir = Path("/root/tts/fish-speech")
|
||||
self.model_path = self.fish_speech_dir / "checkpoints/fish-speech-1.5/model.pth"
|
||||
self.decoder_path = self.fish_speech_dir / "checkpoints/fish-speech-1.5/firefly-gan-vq-fsq-8x1024-21hz-generator.pth"
|
||||
self.reference_audio = Path("/root/tts/ben_guanquelou.wav")
|
||||
self.output_dir = Path("/root/tts/audio_files")
|
||||
self.output_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 默认参数
|
||||
self.reference_text = "登鹳雀楼,白日依山尽,黄河入海流。欲穷千里目,更上一层楼。"
|
||||
self.target_text = """我们习惯于赞美黄河之水天上来,习惯于歌颂大地的厚德载物。教科书告诉我们,河流是水循环的恩赐,大陆是漂浮在岩浆上的方舟。这是一个完美的、闭环的、温情脉脉的解释。但如果,这一切都是关于"摩擦力"的谎言呢?请试着像挤压一个注满水的海绵球一样,去想象我们脚下的这颗星球。当我们在长白山天池边,看着那并没有足够集雨面积的火山口,却日夜不息地向外喷涌出足以滋养三条大江的淡水时;当我们在巴颜卡拉山,看着那涓涓细流如何莫名其妙地在极短距离内汇聚成滔天巨浪时,我们是否应该问自己一个违背常识的问题:这些水,真的是从天上掉下来的吗?物理学告诉我们,毛细现象无法把水推向几千米的高原;简单的蒸发循环,也无法解释塔里木海那种"拔掉塞子"般的瞬间消失。这背后,一定存在一个"第一推动"。它不是温柔的渗透,它是暴力的"挤压"。"""
|
||||
|
||||
self.server_process = None
|
||||
|
||||
def check_files(self):
|
||||
"""检查必需文件"""
|
||||
print("📦 检查文件...")
|
||||
|
||||
files = [
|
||||
(self.model_path, "主模型"),
|
||||
(self.decoder_path, "解码器"),
|
||||
(self.reference_audio, "参考音频")
|
||||
]
|
||||
|
||||
for file_path, name in files:
|
||||
if file_path.exists():
|
||||
size_mb = file_path.stat().st_size / (1024 * 1024)
|
||||
print(f" ✅ {name}: {file_path.name} ({size_mb:.1f}MB)")
|
||||
else:
|
||||
print(f" ❌ {name}: {file_path.name} (缺失)")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def start_api_server(self):
|
||||
"""启动 API 服务器"""
|
||||
print("🚀 启动 Fish Speech API 服务器...")
|
||||
|
||||
# 清理旧进程
|
||||
subprocess.run("pkill -f 'api_server'", shell=True)
|
||||
time.sleep(2)
|
||||
|
||||
# 切换到 Fish Speech 目录
|
||||
os.chdir(self.fish_speech_dir)
|
||||
|
||||
# 启动命令
|
||||
cmd = [
|
||||
sys.executable, "tools/api_server.py",
|
||||
"--llama-checkpoint-path", str(self.model_path),
|
||||
"--decoder-checkpoint-path", str(self.decoder_path),
|
||||
"--device", "cpu"
|
||||
]
|
||||
|
||||
print(f"执行命令: {' '.join(cmd)}")
|
||||
|
||||
# 启动服务器
|
||||
self.server_process = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
# 等待服务器启动
|
||||
print("⏳ 等待服务器启动...")
|
||||
max_wait = 120 # 最多等待2分钟
|
||||
wait_time = 0
|
||||
|
||||
while wait_time < max_wait:
|
||||
if self.server_process.poll() is not None:
|
||||
print("❌ 服务器启动失败")
|
||||
stdout, stderr = self.server_process.communicate()
|
||||
print(f"错误: {stderr}")
|
||||
return False
|
||||
|
||||
# 检查端口
|
||||
try:
|
||||
import requests
|
||||
for port in [8080, 7860, 5000]:
|
||||
try:
|
||||
response = requests.get(f"http://127.0.0.1:{port}/health", timeout=2)
|
||||
if response.status_code == 200:
|
||||
print(f"✅ 服务器已启动: http://127.0.0.1:{port}")
|
||||
self.server_url = f"http://127.0.0.1:{port}"
|
||||
return True
|
||||
except:
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
time.sleep(2)
|
||||
wait_time += 2
|
||||
print(f" 等待中... ({wait_time}s)")
|
||||
|
||||
print("⏰ 服务器启动超时")
|
||||
return False
|
||||
|
||||
def synthesize_speech(self, output_filename="fish_speech_cli_output"):
|
||||
"""进行语音合成"""
|
||||
print("🎙️ 开始语音合成...")
|
||||
print(f"📝 参考文本: {self.reference_text}")
|
||||
print(f"📝 目标文本长度: {len(self.target_text)} 字符")
|
||||
|
||||
# 准备客户端命令
|
||||
client_cmd = [
|
||||
sys.executable, "tools/api_client.py",
|
||||
"--text", self.target_text,
|
||||
"--reference_audio", str(self.reference_audio),
|
||||
"--reference_text", self.reference_text,
|
||||
"--output", str(self.output_dir / output_filename),
|
||||
"--no-play",
|
||||
"--max_new_tokens", "2048",
|
||||
"--chunk_length", "300",
|
||||
"--top_p", "0.8",
|
||||
"--temperature", "0.8",
|
||||
"--repetition_penalty", "1.1",
|
||||
"--url", f"{self.server_url}/v1/tts",
|
||||
"--format", "wav"
|
||||
]
|
||||
|
||||
print(f"执行命令: {' '.join(client_cmd)}")
|
||||
|
||||
# 运行客户端
|
||||
result = subprocess.run(
|
||||
client_cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600 # 10分钟超时
|
||||
)
|
||||
|
||||
print("🎙️ 合成结果:")
|
||||
if result.stdout:
|
||||
print("输出:", result.stdout.strip())
|
||||
if result.stderr:
|
||||
print("错误:", result.stderr.strip())
|
||||
|
||||
return result.returncode == 0
|
||||
|
||||
def check_output(self, output_filename):
|
||||
"""检查输出文件"""
|
||||
output_files = [
|
||||
self.output_dir / f"{output_filename}.wav",
|
||||
self.output_dir / f"{output_filename}.mp3",
|
||||
self.output_dir / f"{output_filename}.flac"
|
||||
]
|
||||
|
||||
for output_file in output_files:
|
||||
if output_file.exists():
|
||||
try:
|
||||
import torchaudio
|
||||
waveform, sample_rate = torchaudio.load(str(output_file))
|
||||
duration = waveform.shape[1] / sample_rate
|
||||
|
||||
print(f"\n✅ 音频生成成功!")
|
||||
print(f"📁 文件: {output_file}")
|
||||
print(f"📊 大小: {output_file.stat().st_size:,} bytes")
|
||||
print(f"🎵 时长: {duration:.2f} 秒")
|
||||
print(f"🎵 采样率: {sample_rate:,} Hz")
|
||||
|
||||
if duration >= 25:
|
||||
print("🎉 时长符合30秒要求!")
|
||||
else:
|
||||
print(f"⚠️ 时长为 {duration:.2f} 秒")
|
||||
|
||||
return True, str(output_file)
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ 读取音频失败: {e}")
|
||||
return True, str(output_file)
|
||||
|
||||
print("❌ 未找到生成的音频文件")
|
||||
return False, None
|
||||
|
||||
def cleanup(self):
|
||||
"""清理资源"""
|
||||
if self.server_process:
|
||||
print("🧹 停止服务器...")
|
||||
self.server_process.terminate()
|
||||
time.sleep(2)
|
||||
|
||||
def run(self, output_filename="fish_speech_cli_output"):
|
||||
"""运行完整的命令行语音合成流程"""
|
||||
print("🎊 Fish Speech 命令行语音克隆")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# 1. 检查文件
|
||||
if not self.check_files():
|
||||
print("❌ 文件检查失败")
|
||||
return False
|
||||
|
||||
# 2. 启动服务器
|
||||
if not self.start_api_server():
|
||||
print("❌ 服务器启动失败")
|
||||
return False
|
||||
|
||||
# 3. 语音合成
|
||||
if not self.synthesize_speech(output_filename):
|
||||
print("❌ 语音合成失败")
|
||||
return False
|
||||
|
||||
# 4. 检查结果
|
||||
success, output_file = self.check_output(output_filename)
|
||||
|
||||
if success:
|
||||
print(f"\n🎉 命令行语音合成完成!")
|
||||
print(f"📁 输出文件: {output_file}")
|
||||
return True
|
||||
else:
|
||||
print("❌ 未找到输出文件")
|
||||
return False
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 用户中断操作")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ 执行失败: {e}")
|
||||
return False
|
||||
finally:
|
||||
# 清理
|
||||
self.cleanup()
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
if len(sys.argv) > 1:
|
||||
output_filename = sys.argv[1]
|
||||
else:
|
||||
output_filename = "fish_speech_cli_output"
|
||||
|
||||
cli = FishSpeechCLI()
|
||||
success = cli.run(output_filename)
|
||||
|
||||
if success:
|
||||
print(f"\n🎊 成功! 使用命令播放音频:")
|
||||
print(f" aplay {cli.output_dir}/{output_filename}.wav")
|
||||
print(f" 或使用文件管理器打开: {cli.output_dir}/")
|
||||
else:
|
||||
print("\n💔 失败,请检查错误信息")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user