#!/usr/bin/env python3 """ Optimized accent demo generator using VoxCPM Improved version with better parameters and shorter text """ import os import numpy as np import soundfile as sf from voxcpm import VoxCPM def generate_accent_demo(model, text, accent_name, output_dir="accent_demos_optimized"): """Generate optimized accent demo audio""" os.makedirs(output_dir, exist_ok=True) # Define reference audio paths ref_audio_map = { "indian": "reference_indian_opt.wav", "russian": "reference_russian_opt.wav", "singaporean": "reference_singaporean_opt.wav", "hongkong": "reference_hongkong_opt.wav" } # Define better reference texts (shorter, more natural) ref_text_map = { "indian": "Hello there! How are you today? I'm from India. The weather here is quite warm.", "russian": "Hello! How are you doing? I'm from Russia. The winters here are very cold.", "singaporean": "Hi! How's it going? I'm from Singapore. We have delicious hawker food here.", "hongkong": "Hey! How are you? I'm from Hong Kong. It's a bustling city with amazing food." } ref_audio = ref_audio_map.get(accent_name) ref_text = ref_text_map.get(accent_name) if not ref_audio or not ref_text: print(f"Invalid accent name: {accent_name}") return # Generate high-quality reference audio if not os.path.exists(ref_audio): print(f"Generating optimized reference audio for {accent_name}...") audio = model.generate( text=ref_text, cfg_value=3.0, # Higher CFG for better quality inference_timesteps=30 # More steps for better quality ) sf.write(ref_audio, audio, 24000) print(f"Generated optimized reference audio: {ref_audio}") # Generate accent demo with optimized parameters output_file = os.path.join(output_dir, f"{accent_name}_demo.wav") print(f"Generating optimized {accent_name} accent demo...") audio = model.generate( text=text, prompt_wav_path=ref_audio, prompt_text=ref_text, cfg_value=3.0, # Higher CFG for better adherence to prompt inference_timesteps=30 # More steps for better quality ) sf.write(output_file, audio, 24000) print(f"Generated optimized {accent_name} accent demo: {output_file}") return output_file def generate_cantonese_pinyin_demo(model, text, pinyin, output_dir="accent_demos_optimized"): """Generate optimized Cantonese pinyin demo""" os.makedirs(output_dir, exist_ok=True) # Generate better Cantonese reference audio ref_audio = "reference_cantonese_opt.wav" ref_text = "你好,我是张学友。很高兴认识你。我喜欢唱歌。" if not os.path.exists(ref_audio): print("Generating optimized Cantonese reference audio...") audio = model.generate( text=ref_text, cfg_value=3.0, inference_timesteps=30 ) sf.write(ref_audio, audio, 24000) print(f"Generated optimized Cantonese reference audio: {ref_audio}") # Generate Cantonese pinyin demo output_file = os.path.join(output_dir, "cantonese_pinyin_demo.wav") print("Generating optimized Cantonese pinyin demo...") audio = model.generate( text=pinyin, prompt_wav_path=ref_audio, prompt_text=ref_text, cfg_value=3.0, inference_timesteps=30 ) sf.write(output_file, audio, 24000) print(f"Generated optimized Cantonese pinyin demo: {output_file}") return output_file if __name__ == "__main__": # Initialize VoxCPM print("Initializing VoxCPM...") model = VoxCPM.from_pretrained("openbmb/VoxCPM1.5") # Shorter test text for better results test_text = "Hello everyone! Welcome to our podcast. I hope you enjoy this episode!" # Generate optimized accent demos accents = ["indian", "russian", "singaporean", "hongkong"] for accent in accents: generate_accent_demo(model, test_text, accent) # Generate optimized Cantonese pinyin demo cantonese_text = "张学友是香港著名歌手,被誉为歌神。" cantonese_pinyin = "Zhang Xueyou is a famous Hong Kong singer, known as the God of Songs." generate_cantonese_pinyin_demo(model, cantonese_text, cantonese_pinyin) print("All optimized demos generated successfully!")