#!/usr/bin/env python3 """ Fixed emotion test for VoxCPM Using proper parameter format """ import os import sys import soundfile as sf import numpy as np # Paths WORKSPACE = "/root/tts" VOXCPM_DIR = os.path.join(WORKSPACE, "VoxCPM") OUTPUT_DIR = os.path.join(WORKSPACE, "accent_verification") # Add VoxCPM to path sys.path.insert(0, os.path.join(VOXCPM_DIR, "src")) # Import VoxCPM try: from voxcpm.core import VoxCPM except Exception as e: print(f"āŒ Failed to import VoxCPM: {e}") sys.exit(1) # Use LOCAL model LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "openbmb__VoxCPM1.5") if not os.path.exists(LOCAL_MODEL_PATH): LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "VoxCPM1.5") if not os.path.exists(LOCAL_MODEL_PATH): print(f"āŒ Local model path not found") sys.exit(1) # Initialize VoxCPM model = VoxCPM( voxcpm_model_path=LOCAL_MODEL_PATH, enable_denoiser=False, optimize=False ) # Test sentence test_sentence = "Hello everyone! I'm speaking with different emotion today. How does it sound to you?" def create_emotion_reference(emotion): """Create emotion reference audio""" ref_file = os.path.join(WORKSPACE, f"reference_{emotion}.wav") # Emotion-specific reference texts emotion_texts = { "happy": "Wow! I'm so excited and happy today! Everything is going great! I can't believe how wonderful this day is!", "sad": "I'm feeling very sad and lonely today. Nothing seems to be going right. Everything feels so overwhelming.", "angry": "I'm really angry and frustrated! This is completely unacceptable! I can't believe what just happened!", "calm": "I'm feeling very calm and peaceful today. Everything is quiet and serene. I feel so relaxed and at ease." } ref_text = emotion_texts.get(emotion) if not os.path.exists(ref_file): print(f"šŸŽ™ļø Creating {emotion} emotion reference...") print(f"Reference text: {ref_text[:50]}...") # Generate reference audio with emotion audio = model.generate( text=ref_text, cfg_value=2.5, inference_timesteps=20, normalize=True ) sf.write(ref_file, audio, model.tts_model.sample_rate) print(f"āœ… Created {emotion} reference: {ref_file}") return ref_file, ref_text def test_emotion(emotion): """Test emotion generation""" ref_audio, ref_text = create_emotion_reference(emotion) output_file = os.path.join(OUTPUT_DIR, f"{emotion}_emotion_test.wav") print(f"\n😊 Testing {emotion} emotion...") print(f"Test sentence: {test_sentence}") try: # Generate audio with emotion audio = model.generate( text=test_sentence, prompt_wav_path=ref_audio, prompt_text=ref_text, cfg_value=2.0, inference_timesteps=20, normalize=True, retry_badcase=True ) # Save audio sf.write(output_file, audio, model.tts_model.sample_rate) if os.path.exists(output_file): duration = len(audio) / model.tts_model.sample_rate print(f"āœ… Generated {emotion} emotion: {output_file}") print(f" Duration: {duration:.2f} seconds") else: print(f"āŒ Failed to save") except Exception as e: print(f"āŒ Error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": print(f"{'='*70}") print(f"FIXED EMOTION EXPRESSION TEST") print(f"{'='*70}") emotions = ["happy", "sad", "angry", "calm"] for emotion in emotions: test_emotion(emotion) print(f"\n{'='*70}") print(f"EMOTION TEST COMPLETE") print(f"{'='*70}") print(f"Output directory: {OUTPUT_DIR}") print(f"\nšŸ“‹ Generated emotion files:") for emotion in emotions: print(f" - {emotion}_emotion_test.wav") print(f"\nšŸŽ§ Please listen to the files to verify emotion differences!")