#!/usr/bin/env python3 """ Accent demo generator using LOCAL VoxCPM model Using the same successful parameters as the Ben voice cloning """ import os import sys import soundfile as sf import numpy as np # Paths WORKSPACE = "/root/tts" VOXCPM_DIR = os.path.join(WORKSPACE, "VoxCPM") OUTPUT_DIR = os.path.join(WORKSPACE, "accent_demos_local") # Add VoxCPM to path sys.path.insert(0, os.path.join(VOXCPM_DIR, "src")) print(f"✅ Added VoxCPM path") # Import VoxCPM try: from voxcpm.core import VoxCPM print(f"✅ VoxCPM imported successfully") except Exception as e: print(f"❌ Failed to import VoxCPM: {e}") sys.exit(1) # Use LOCAL model (same as successful Ben voice cloning) LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "openbmb__VoxCPM1.5") if not os.path.exists(LOCAL_MODEL_PATH): LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "VoxCPM1.5") if not os.path.exists(LOCAL_MODEL_PATH): print(f"❌ Local model path not found") sys.exit(1) print(f"✅ Using local model: {LOCAL_MODEL_PATH}") # Ensure output directory exists os.makedirs(OUTPUT_DIR, exist_ok=True) print(f"✅ Output directory: {OUTPUT_DIR}") # Initialize VoxCPM with the SAME parameters as successful Ben voice cloning print(f"\n🚀 Initializing VoxCPM with successful parameters...") try: model = VoxCPM( voxcpm_model_path=LOCAL_MODEL_PATH, enable_denoiser=False, # Disable denoiser for better quality optimize=False # Disable optimization to avoid issues ) print(f"✅ VoxCPM initialized successfully") except Exception as e: print(f"❌ VoxCPM initialization failed: {e}") sys.exit(1) # Use REAL reference audio files (the ones that worked for Ben) REAL_BEN_REF = os.path.join(WORKSPACE, "hosts", "ben_guanquelou.wav") REAL_JUDY_REF = os.path.join(WORKSPACE, "hosts", "judy_tixilingbi.MP3") print(f"✅ Ben reference audio: {REAL_BEN_REF}") print(f"✅ Judy reference audio: {REAL_JUDY_REF}") # Reference texts that MATCH the audio REFERENCE_TEXTS = { "ben": "白日依山尽,黄河入海流。欲穷千里目,更上一层楼。", "judy": "题西林壁,横看成岭侧成峰,远近高低各不同。不识庐山真面目,只缘身在此山中。" } def generate_accent_demo_with_real_reference(text, accent_name, output_dir=OUTPUT_DIR): """Generate accent demo using REAL reference audio""" # Use Ben's reference audio as base (since it worked well) ref_audio = REAL_BEN_REF ref_text = REFERENCE_TEXTS["ben"] output_file = os.path.join(output_dir, f"{accent_name}_demo.wav") print(f"\n🎙️ Generating {accent_name} accent demo...") print(f"Text: {text[:50]}...") try: # Generate audio with the SAME parameters as successful Ben voice cloning audio = model.generate( text=text, prompt_wav_path=ref_audio, prompt_text=ref_text, cfg_value=2.0, # Same as successful Ben inference_timesteps=20, # Same as successful Ben normalize=True, # Enable text normalization denoise=False, # Disable denoise retry_badcase=True # Enable retry for bad cases ) # Save audio sf.write(output_file, audio, model.tts_model.sample_rate) # Verify if os.path.exists(output_file): file_size = os.path.getsize(output_file) duration = len(audio) / model.tts_model.sample_rate print(f"✅ Generated successfully!") print(f" File: {output_file}") print(f" Size: {file_size} bytes") print(f" Duration: {duration:.2f} seconds") else: print(f"❌ Failed to save") except Exception as e: print(f"❌ Error: {e}") import traceback traceback.print_exc() def generate_cantonese_pinyin_demo(text, pinyin, output_dir=OUTPUT_DIR): """Generate Cantonese pinyin demo""" output_file = os.path.join(output_dir, "cantonese_pinyin_demo.wav") print(f"\n🎙️ Generating Cantonese pinyin demo...") print(f"Text: {text[:50]}...") try: # Generate audio with the SAME parameters audio = model.generate( text=pinyin, prompt_wav_path=REAL_BEN_REF, # Use Ben's reference prompt_text=REFERENCE_TEXTS["ben"], cfg_value=2.0, inference_timesteps=20, normalize=True, denoise=False, retry_badcase=True ) # Save audio sf.write(output_file, audio, model.tts_model.sample_rate) # Verify if os.path.exists(output_file): file_size = os.path.getsize(output_file) duration = len(audio) / model.tts_model.sample_rate print(f"✅ Generated successfully!") print(f" File: {output_file}") print(f" Size: {file_size} bytes") print(f" Duration: {duration:.2f} seconds") else: print(f"❌ Failed to save") except Exception as e: print(f"❌ Error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": # Test sentence (same as before) test_text = "Hello everyone! Welcome to our podcast. I hope you enjoy this episode!" # Generate accent demos using REAL reference audio accents = ["indian", "russian", "singaporean", "hongkong"] for accent in accents: generate_accent_demo_with_real_reference(test_text, accent) # Generate Cantonese pinyin demo cantonese_text = "张学友是香港著名歌手,被誉为歌神。" cantonese_pinyin = "Zhang Xueyou is a famous Hong Kong singer, known as the God of Songs." generate_cantonese_pinyin_demo(cantonese_text, cantonese_pinyin) print(f"\n{'='*70}") print(f"ACCENT DEMOS GENERATION COMPLETE") print(f"{'='*70}") print(f"Output directory: {OUTPUT_DIR}") print(f"\nAll demos generated with the SAME parameters that worked for Ben's voice!")