Initial commit for TTS project

This commit is contained in:
Ben
2026-01-19 10:27:41 +08:00
commit a9abd3913d
160 changed files with 11031 additions and 0 deletions

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""
Generate accent demos using VoxCPM
Supports: Indian, Russian, Singaporean, Hong Kong English accents
"""
import os
import numpy as np
import soundfile as sf
from voxcpm import VoxCPM
def generate_accent_demo(model, text, accent_name, output_dir="accent_demos"):
"""Generate accent demo audio"""
os.makedirs(output_dir, exist_ok=True)
# Define reference audio paths (will be created if not exist)
ref_audio_map = {
"indian": "reference_indian.wav",
"russian": "reference_russian.wav",
"singaporean": "reference_singaporean.wav",
"hongkong": "reference_hongkong.wav"
}
# Define reference texts that demonstrate accent characteristics
ref_text_map = {
"indian": "Hello, how are you doing today? I'm from Mumbai, India. The weather here is quite warm and humid during the summer months. Would you like to try some delicious Indian cuisine with me?",
"russian": "Hello, how are you doing today? I'm from Moscow, Russia. The winters here are very cold, with lots of snow and ice. But the summers are beautiful and sunny. Would you like to visit the Red Square with me?",
"singaporean": "Hello, how are you doing today? I'm from Singapore. It's a small but vibrant city-state in Southeast Asia. We have delicious hawker food and beautiful gardens. Would you like to try some chicken rice with me?",
"hongkong": "Hello, how are you doing today? I'm from Hong Kong. It's a bustling metropolitan city with amazing skyline and delicious food. We have dim sum, roast goose, and many other Cantonese delicacies. Would you like to go shopping in Causeway Bay with me?"
}
ref_audio = ref_audio_map.get(accent_name)
ref_text = ref_text_map.get(accent_name)
if not ref_audio or not ref_text:
print(f"Invalid accent name: {accent_name}")
return
# Check if reference audio exists (if not, we'll generate it using default voice)
if not os.path.exists(ref_audio):
print(f"Reference audio not found for {accent_name}, generating with default voice...")
# Generate reference audio using default voice
audio = model.generate(
text=ref_text,
cfg_value=2.0,
inference_timesteps=20
)
sf.write(ref_audio, audio, 24000)
print(f"Generated reference audio: {ref_audio}")
# Generate accent demo
output_file = os.path.join(output_dir, f"{accent_name}_demo.wav")
print(f"Generating {accent_name} accent demo...")
audio = model.generate(
text=text,
prompt_wav_path=ref_audio,
prompt_text=ref_text,
cfg_value=2.0,
inference_timesteps=20
)
sf.write(output_file, audio, 24000)
print(f"Generated {accent_name} accent demo: {output_file}")
return output_file
def generate_cantonese_pinyin_demo(model, text, pinyin, output_dir="accent_demos"):
"""Generate Cantonese pinyin demo"""
os.makedirs(output_dir, exist_ok=True)
# Generate reference audio for Cantonese accent
ref_audio = "reference_cantonese.wav"
ref_text = "你好,我是张学友。很高兴认识你。我喜欢唱歌和表演。希望你喜欢我的音乐。"
if not os.path.exists(ref_audio):
print("Generating Cantonese reference audio...")
audio = model.generate(
text=ref_text,
cfg_value=2.0,
inference_timesteps=20
)
sf.write(ref_audio, audio, 24000)
print(f"Generated Cantonese reference audio: {ref_audio}")
# Generate Cantonese pinyin demo
output_file = os.path.join(output_dir, "cantonese_pinyin_demo.wav")
print("Generating Cantonese pinyin demo...")
audio = model.generate(
text=pinyin,
prompt_wav_path=ref_audio,
prompt_text=ref_text,
cfg_value=2.0,
inference_timesteps=20
)
sf.write(output_file, audio, 24000)
print(f"Generated Cantonese pinyin demo: {output_file}")
return output_file
if __name__ == "__main__":
# Initialize VoxCPM
print("Initializing VoxCPM...")
model = VoxCPM.from_pretrained("openbmb/VoxCPM1.5")
# Test sentence
test_text = "Hello everyone, welcome to our podcast. Today we're going to discuss various accents from around the world. I hope you enjoy this episode!"
# Generate accent demos
accents = ["indian", "russian", "singaporean", "hongkong"]
for accent in accents:
generate_accent_demo(model, test_text, accent)
# Generate Cantonese pinyin demo (Jacky Cheung)
cantonese_text = "张学友是香港著名歌手,被誉为歌神。他的歌声深情动人,深受歌迷喜爱。"
cantonese_pinyin = "{zoeng1}{hau2}{juk6} {si6} {hoeng1}{gong2} {zyu4}{ming4} {go1}{sau2}{bei6}{jyu6} {go1}{san4}{taa1} {dik1} {go1}{sing1} {sam1}{cing4} {dung6}{jan4}{sam1}{sau6} {go1}{mai4} {hei2}{oi3}"
generate_cantonese_pinyin_demo(model, cantonese_text, cantonese_pinyin)
print("All demos generated successfully!")