Files
tts/scripts/test_emotion_fixed.py
2026-01-19 10:27:41 +08:00

128 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""
Fixed emotion test for VoxCPM
Using proper parameter format
"""
import os
import sys
import soundfile as sf
import numpy as np
# Paths
WORKSPACE = "/root/tts"
VOXCPM_DIR = os.path.join(WORKSPACE, "VoxCPM")
OUTPUT_DIR = os.path.join(WORKSPACE, "accent_verification")
# Add VoxCPM to path
sys.path.insert(0, os.path.join(VOXCPM_DIR, "src"))
# Import VoxCPM
try:
from voxcpm.core import VoxCPM
except Exception as e:
print(f"❌ Failed to import VoxCPM: {e}")
sys.exit(1)
# Use LOCAL model
LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "openbmb__VoxCPM1.5")
if not os.path.exists(LOCAL_MODEL_PATH):
LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "VoxCPM1.5")
if not os.path.exists(LOCAL_MODEL_PATH):
print(f"❌ Local model path not found")
sys.exit(1)
# Initialize VoxCPM
model = VoxCPM(
voxcpm_model_path=LOCAL_MODEL_PATH,
enable_denoiser=False,
optimize=False
)
# Test sentence
test_sentence = "Hello everyone! I'm speaking with different emotion today. How does it sound to you?"
def create_emotion_reference(emotion):
"""Create emotion reference audio"""
ref_file = os.path.join(WORKSPACE, f"reference_{emotion}.wav")
# Emotion-specific reference texts
emotion_texts = {
"happy": "Wow! I'm so excited and happy today! Everything is going great! I can't believe how wonderful this day is!",
"sad": "I'm feeling very sad and lonely today. Nothing seems to be going right. Everything feels so overwhelming.",
"angry": "I'm really angry and frustrated! This is completely unacceptable! I can't believe what just happened!",
"calm": "I'm feeling very calm and peaceful today. Everything is quiet and serene. I feel so relaxed and at ease."
}
ref_text = emotion_texts.get(emotion)
if not os.path.exists(ref_file):
print(f"🎙️ Creating {emotion} emotion reference...")
print(f"Reference text: {ref_text[:50]}...")
# Generate reference audio with emotion
audio = model.generate(
text=ref_text,
cfg_value=2.5,
inference_timesteps=20,
normalize=True
)
sf.write(ref_file, audio, model.tts_model.sample_rate)
print(f"✅ Created {emotion} reference: {ref_file}")
return ref_file, ref_text
def test_emotion(emotion):
"""Test emotion generation"""
ref_audio, ref_text = create_emotion_reference(emotion)
output_file = os.path.join(OUTPUT_DIR, f"{emotion}_emotion_test.wav")
print(f"\n😊 Testing {emotion} emotion...")
print(f"Test sentence: {test_sentence}")
try:
# Generate audio with emotion
audio = model.generate(
text=test_sentence,
prompt_wav_path=ref_audio,
prompt_text=ref_text,
cfg_value=2.0,
inference_timesteps=20,
normalize=True,
retry_badcase=True
)
# Save audio
sf.write(output_file, audio, model.tts_model.sample_rate)
if os.path.exists(output_file):
duration = len(audio) / model.tts_model.sample_rate
print(f"✅ Generated {emotion} emotion: {output_file}")
print(f" Duration: {duration:.2f} seconds")
else:
print(f"❌ Failed to save")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
print(f"{'='*70}")
print(f"FIXED EMOTION EXPRESSION TEST")
print(f"{'='*70}")
emotions = ["happy", "sad", "angry", "calm"]
for emotion in emotions:
test_emotion(emotion)
print(f"\n{'='*70}")
print(f"EMOTION TEST COMPLETE")
print(f"{'='*70}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"\n📋 Generated emotion files:")
for emotion in emotions:
print(f" - {emotion}_emotion_test.wav")
print(f"\n🎧 Please listen to the files to verify emotion differences!")