242 lines
9.2 KiB
Python
242 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Author Interview Podcast Generator - Chapter 8
|
|
- Author uses VoxCPM for voice
|
|
- Other guests use Edge TTS
|
|
- All content in English
|
|
"""
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime
|
|
|
|
# Paths
|
|
WORKSPACE = "/root/tts"
|
|
OUTPUT_DIR = os.path.join(WORKSPACE, "podcast_audios", "chapter8_author_interview")
|
|
VOXCPM_DIR = os.path.join(WORKSPACE, "VoxCPM")
|
|
|
|
# Ensure directories exist
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
print(f"✅ Output directory created: {OUTPUT_DIR}")
|
|
|
|
# Add VoxCPM to path
|
|
sys.path.insert(0, os.path.join(VOXCPM_DIR, "src"))
|
|
print(f"✅ Added VoxCPM path")
|
|
|
|
# Import VoxCPM for author voice
|
|
try:
|
|
from voxcpm.core import VoxCPM
|
|
print(f"✅ VoxCPM imported successfully")
|
|
except Exception as e:
|
|
print(f"❌ Failed to import VoxCPM: {e}")
|
|
sys.exit(1)
|
|
|
|
# Model path
|
|
LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "openbmb__VoxCPM1.5")
|
|
if not os.path.exists(LOCAL_MODEL_PATH):
|
|
LOCAL_MODEL_PATH = os.path.join(VOXCPM_DIR, "models", "VoxCPM1.5")
|
|
if not os.path.exists(LOCAL_MODEL_PATH):
|
|
print(f"❌ Model path not found")
|
|
sys.exit(1)
|
|
print(f"✅ Model path: {LOCAL_MODEL_PATH}")
|
|
|
|
# Initialize VoxCPM for author
|
|
print(f"\n🚀 Initializing VoxCPM for author voice...")
|
|
try:
|
|
author_voice = VoxCPM(
|
|
voxcpm_model_path=LOCAL_MODEL_PATH,
|
|
enable_denoiser=False,
|
|
optimize=False
|
|
)
|
|
print(f"✅ VoxCPM initialized successfully")
|
|
except Exception as e:
|
|
print(f"❌ VoxCPM initialization failed: {e}")
|
|
sys.exit(1)
|
|
|
|
# Edge TTS voices for guests
|
|
EDGE_TTS_VOICES = {
|
|
"graham": "en-US-GuyNeural", # American male for tech bro
|
|
"dmitri": "ru-RU-DmitryNeural", # Russian male for Dmitri
|
|
"amita": "en-US-AriaNeural", # American female as fallback for Amita
|
|
"mohammed": "ar-SA-HamedNeural" # Arabic male for Mohammed
|
|
}
|
|
|
|
# Interview content in English
|
|
INTERVIEW_CONTENT = {
|
|
"author": {
|
|
"intro": {
|
|
"text": "Welcome to the chapter 8 interview. Today we're discussing how China used patience to get its entry ticket to the world factory between 2001 and 2009. The core metaphor is Han Xin's胯下 humiliation - enduring temporary shame for long-term success.",
|
|
"filename": "author_intro.wav"
|
|
},
|
|
"response_1": {
|
|
"text": "Great question, Graham. The technical gap was indeed significant. But China understood that modern warfare is about endurance, not just firepower. While America was fighting the War on Terror, China was building its industrial base. This strategic patience is what allowed them to become the world's factory.",
|
|
"filename": "author_response_1.wav"
|
|
},
|
|
"response_2": {
|
|
"text": "Dmitri makes an excellent point about energy. Russia's natural gas was crucial for China's 24-hour production lines. This was a mutually beneficial strategic cooperation - Russia provided the energy, China provided the market. It's a perfect example of how geopolitical interests can create unexpected alliances.",
|
|
"filename": "author_response_2.wav"
|
|
}
|
|
},
|
|
"guests": {
|
|
"graham": {
|
|
"question": {
|
|
"text": "Wait, host. I think you're missing a key variable - the technological gap. In the 2003 Iraq War, the US overthrew Saddam in just 42 days. In 2001 Afghanistan, precision-guided bombs destroyed all Taliban strongholds. This shows war has changed. Why are you still using Cold War thinking to analyze geopolitics?",
|
|
"filename": "graham_question.wav"
|
|
}
|
|
},
|
|
"dmitri": {
|
|
"question": {
|
|
"text": "Host, I agree technology is important, but let me add - energy is the ultimate ace. In 2006, when natural gas prices rose, how did Europeans tremble? China became the world's factory precisely because of Russia's energy support. Siberian gas pipelines are the real entry ticket. Without Russian energy, how could China operate 24/7?",
|
|
"filename": "dmitri_question.wav"
|
|
}
|
|
},
|
|
"amita": {
|
|
"question": {
|
|
"text": "Wait, both of you. The world factory you're talking about seems to assume the 'China model' is the only one. But let me remind you - after 2008, Bangalore is rising. India's software outsourcing, Mexico's nearshoring, Vietnam's assembly lines... There's more than one world factory. Why do you only talk about China?",
|
|
"filename": "amita_question.wav"
|
|
}
|
|
},
|
|
"mohammed": {
|
|
"question": {
|
|
"text": "You all make good points, but I want to ask a more fundamental question - is the concept of 'world factory' itself a trap? What did China get for its 70% foreign trade dependence? It got US aircraft carriers that can cut off the Malacca Strait at any time. It got the risk of putting all eggs in one basket. Host, you call this an 'entry ticket'? I think it's more like an invitation to a trap.",
|
|
"filename": "mohammed_question.wav"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Function to generate author voice with VoxCPM
|
|
def generate_author_voice(text, filename):
|
|
"""Generate author voice using VoxCPM"""
|
|
output_file = os.path.join(OUTPUT_DIR, filename)
|
|
print(f"\n🎙️ Generating author voice for: {filename}")
|
|
print(f"Text: {text[:50]}...")
|
|
|
|
try:
|
|
audio = author_voice.generate(
|
|
text=text,
|
|
prompt_wav_path=None,
|
|
prompt_text=None,
|
|
cfg_value=2.0,
|
|
inference_timesteps=20,
|
|
normalize=True,
|
|
denoise=False,
|
|
retry_badcase=True
|
|
)
|
|
|
|
import soundfile as sf
|
|
sf.write(output_file, audio, author_voice.tts_model.sample_rate)
|
|
|
|
if os.path.exists(output_file):
|
|
file_size = os.path.getsize(output_file)
|
|
duration = len(audio) / author_voice.tts_model.sample_rate
|
|
print(f"✅ Author voice generated successfully!")
|
|
print(f" File: {output_file}")
|
|
print(f" Size: {file_size} bytes")
|
|
print(f" Duration: {duration:.2f} seconds")
|
|
return True
|
|
else:
|
|
print(f"❌ Failed to save author voice")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error generating author voice: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
# Function to generate guest voice with Edge TTS
|
|
def generate_guest_voice(guest_id, text, filename):
|
|
"""Generate guest voice using Edge TTS"""
|
|
output_file = os.path.join(OUTPUT_DIR, filename)
|
|
voice = EDGE_TTS_VOICES.get(guest_id)
|
|
|
|
if not voice:
|
|
print(f"❌ No voice found for guest: {guest_id}")
|
|
return False
|
|
|
|
print(f"\n🎙️ Generating {guest_id} voice with Edge TTS: {filename}")
|
|
print(f"Voice: {voice}")
|
|
print(f"Text: {text[:50]}...")
|
|
|
|
try:
|
|
# Use edge-tts command
|
|
command = [
|
|
"edge-tts",
|
|
"--voice", voice,
|
|
"--text", text,
|
|
"--write-media", output_file
|
|
]
|
|
|
|
result = subprocess.run(
|
|
command,
|
|
capture_output=True,
|
|
text=True,
|
|
cwd=WORKSPACE
|
|
)
|
|
|
|
if result.returncode == 0 and os.path.exists(output_file):
|
|
file_size = os.path.getsize(output_file)
|
|
print(f"✅ Guest voice generated successfully!")
|
|
print(f" File: {output_file}")
|
|
print(f" Size: {file_size} bytes")
|
|
return True
|
|
else:
|
|
print(f"❌ Failed to generate guest voice")
|
|
print(f" Error: {result.stderr}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error generating guest voice: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
# Main generation process
|
|
print(f"\n{'='*70}")
|
|
print(f"STARTING AUTHOR INTERVIEW PODCAST GENERATION")
|
|
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print(f"{'='*70}")
|
|
|
|
# Generate author voice (using VoxCPM)
|
|
print(f"\n{'='*50}")
|
|
print(f"GENERATING AUTHOR VOICE (VoxCPM)")
|
|
print(f"{'='*50}")
|
|
|
|
for key, content in INTERVIEW_CONTENT["author"].items():
|
|
generate_author_voice(content["text"], content["filename"])
|
|
|
|
# Generate guest voices (using Edge TTS)
|
|
print(f"\n{'='*50}")
|
|
print(f"GENERATING GUEST VOICES (Edge TTS)")
|
|
print(f"{'='*50}")
|
|
|
|
for guest_id, guest_content in INTERVIEW_CONTENT["guests"].items():
|
|
for key, content in guest_content.items():
|
|
generate_guest_voice(guest_id, content["text"], content["filename"])
|
|
|
|
# Verify all files
|
|
print(f"\n{'='*70}")
|
|
print(f"VERIFICATION: GENERATED FILES")
|
|
print(f"{'='*70}")
|
|
|
|
all_files = []
|
|
for root, dirs, files in os.walk(OUTPUT_DIR):
|
|
for file in files:
|
|
if file.endswith('.wav'):
|
|
file_path = os.path.join(root, file)
|
|
file_size = os.path.getsize(file_path)
|
|
all_files.append((file, file_size))
|
|
|
|
if all_files:
|
|
print(f"✅ Generated {len(all_files)} files:")
|
|
for file, size in all_files:
|
|
print(f" 📄 {file} ({size} bytes)")
|
|
else:
|
|
print(f"❌ No files generated!")
|
|
|
|
print(f"\n{'='*70}")
|
|
print(f"PODCAST GENERATION COMPLETE")
|
|
print(f"Output directory: {OUTPUT_DIR}")
|
|
print(f"{'='*70}") |