Files
tts/scripts/test_accent_verification.py
2026-01-19 10:27:41 +08:00

206 lines
7.2 KiB
Python

#!/usr/bin/env python3
"""
Accent verification test for VoxCPM
Using different reference audios for different accents
"""
import os
import sys
import soundfile as sf
import numpy as np
# Paths
WORKSPACE = "/root/tts"
VOXCPM_DIR = os.path.join(WORKSPACE, "VoxCPM")
OUTPUT_DIR = os.path.join(WORKSPACE, "accent_verification")
# Add VoxCPM to path
sys.path.insert(0, os.path.join(VOXCPM_DIR, "src"))
print(f"✅ Added VoxCPM path")
# Import VoxCPM
try:
from voxcpm.core import VoxCPM
print(f"✅ VoxCPM imported successfully")
except Exception as e:
print(f"❌ Failed to import VoxCPM: {e}")
sys.exit(1)
# Use LOCAL model
LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "openbmb__VoxCPM1.5")
if not os.path.exists(LOCAL_MODEL_PATH):
LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "VoxCPM1.5")
if not os.path.exists(LOCAL_MODEL_PATH):
print(f"❌ Local model path not found")
sys.exit(1)
print(f"✅ Using local model: {LOCAL_MODEL_PATH}")
# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"✅ Output directory: {OUTPUT_DIR}")
# Initialize VoxCPM
print(f"\n🚀 Initializing VoxCPM...")
try:
model = VoxCPM(
voxcpm_model_path=LOCAL_MODEL_PATH,
enable_denoiser=False,
optimize=False
)
print(f"✅ VoxCPM initialized successfully")
except Exception as e:
print(f"❌ VoxCPM initialization failed: {e}")
sys.exit(1)
# Test sentence
test_sentence = "Hello everyone! I'm speaking with a different accent today. How does it sound to you?"
# Create accent-specific reference audios
def create_accent_reference(accent_name, description):
"""Create reference audio for specific accent"""
ref_file = os.path.join(WORKSPACE, f"reference_{accent_name}.wav")
# Create accent-specific reference text
ref_texts = {
"indian": "Namaste! How are you doing today? I'm from India. The weather here is quite warm and sunny.",
"british": "Hello there! How are you today? I'm from London. The weather here is quite rainy and cold.",
"american": "Hey! What's up? I'm from New York. The weather here is pretty nice today.",
"australian": "G'day mate! How ya goin'? I'm from Sydney. The weather here is bloody fantastic!",
"russian": "Privet! Kak dela? I'm from Moscow. The weather here is very cold with snow.",
"singaporean": "Hi there! How's it going? I'm from Singapore. We have delicious hawker food here.",
"hongkong": "Nei ho! How are you? I'm from Hong Kong. It's a busy city with great food."
}
ref_text = ref_texts.get(accent_name, ref_texts["american"])
if not os.path.exists(ref_file):
print(f"🎙️ Creating {accent_name} accent reference...")
print(f"Reference text: {ref_text}")
# Generate reference audio with distinct characteristics
audio = model.generate(
text=ref_text,
cfg_value=2.5,
inference_timesteps=20,
normalize=True
)
sf.write(ref_file, audio, model.tts_model.sample_rate)
print(f"✅ Created {accent_name} reference: {ref_file}")
return ref_file, ref_text
# Test different accents
def test_accent(accent_name, description):
"""Test accent generation"""
ref_audio, ref_text = create_accent_reference(accent_name, description)
output_file = os.path.join(OUTPUT_DIR, f"{accent_name}_accent_test.wav")
print(f"\n🎯 Testing {accent_name} accent...")
print(f"Test sentence: {test_sentence}")
try:
# Generate audio with accent
audio = model.generate(
text=test_sentence,
prompt_wav_path=ref_audio,
prompt_text=ref_text,
cfg_value=2.0,
inference_timesteps=20,
normalize=True,
retry_badcase=True
)
# Save audio
sf.write(output_file, audio, model.tts_model.sample_rate)
# Verify
if os.path.exists(output_file):
file_size = os.path.getsize(output_file)
duration = len(audio) / model.tts_model.sample_rate
print(f"✅ Generated successfully!")
print(f" File: {output_file}")
print(f" Size: {file_size} bytes")
print(f" Duration: {duration:.2f} seconds")
else:
print(f"❌ Failed to save")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
# Test emotion capability
def test_emotion():
"""Test emotion expression capability"""
emotions = {
"happy": "Wow! I'm so excited and happy today! Everything is going great!",
"sad": "I'm feeling very sad and lonely today. Nothing seems to be going right.",
"angry": "I'm really angry and frustrated! This is completely unacceptable!",
"calm": "I'm feeling very calm and peaceful today. Everything is quiet and serene."
}
for emotion, ref_text in emotions.items():
output_file = os.path.join(OUTPUT_DIR, f"{emotion}_emotion_test.wav")
print(f"\n😊 Testing {emotion} emotion...")
try:
# Generate audio with emotion
audio = model.generate(
text=test_sentence,
prompt_wav_path=None, # Let model infer emotion from text
prompt_text=ref_text,
cfg_value=2.5,
inference_timesteps=20,
normalize=True
)
# Save audio
sf.write(output_file, audio, model.tts_model.sample_rate)
if os.path.exists(output_file):
duration = len(audio) / model.tts_model.sample_rate
print(f"✅ Generated {emotion} emotion: {output_file}")
print(f" Duration: {duration:.2f} seconds")
else:
print(f"❌ Failed to save")
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
print(f"{'='*70}")
print(f"VOXCPM ACCENT AND EMOTION VERIFICATION TEST")
print(f"{'='*70}")
# Test different accents
accents = [
("indian", "Indian English accent"),
("british", "British English accent"),
("american", "American English accent"),
("australian", "Australian English accent"),
("russian", "Russian English accent"),
("singaporean", "Singaporean English accent"),
("hongkong", "Hong Kong English accent")
]
for accent_name, description in accents:
test_accent(accent_name, description)
# Test emotion capability
print(f"\n{'='*70}")
print(f"TESTING EMOTION EXPRESSION CAPABILITY")
print(f"{'='*70}")
test_emotion()
print(f"\n{'='*70}")
print(f"VERIFICATION TEST COMPLETE")
print(f"{'='*70}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"\n📋 Generated files:")
for accent_name, _ in accents:
print(f" - {accent_name}_accent_test.wav")
for emotion in ["happy", "sad", "angry", "calm"]:
print(f" - {emotion}_emotion_test.wav")
print(f"\n🎧 Please listen to the files to verify accent and emotion differences!")