import os import subprocess from pydub import AudioSegment from pydub.generators import WhiteNoise import random # 确保输出目录存在 output_dir = "podcast_audios" os.makedirs(output_dir, exist_ok=True) def apply_phone_effect(audio_segment, noise_level=0.02, add_dial_tone=False): """ 应用越洋电话音效 - 模拟电话带宽限制 (300-3400Hz) - 添加线路噪音 - 轻微失真效果 - 可选添加拨号音 """ # 0. 可选:添加拨号音和接通提示音 if add_dial_tone: # 生成拨号音(国际长途拨号音) dial_tone = generate_dial_tone(duration=2000) # 生成接通提示音(短暂的提示音) connect_beep = generate_connect_beep() # 将拨号音和提示音添加到音频开头 audio_segment = dial_tone + connect_beep + audio_segment # 1. 降低采样率模拟电话音质 audio_segment = audio_segment.set_frame_rate(8000) # 2. 应用带通滤波器模拟电话频段 # pydub没有直接的带通滤波,我们可以通过低通+高通组合实现 audio_segment = audio_segment.low_pass_filter(3400) audio_segment = audio_segment.high_pass_filter(300) # 3. 生成线路噪音 (呲呲嚓嚓声) # 创建白噪音并调整频谱使其听起来像电话线路噪音 noise = WhiteNoise().to_audio_segment(duration=len(audio_segment)) noise = noise.low_pass_filter(2000) # 限制噪音高频 noise = noise - (60 / noise_level) # 调整音量 # 4. 添加间歇性的电流干扰声 crackle_interval = 3000 # 每3秒一次干扰 crackle_duration = 200 # 干扰持续200ms for i in range(0, len(audio_segment), crackle_interval): if random.random() < 0.3: # 30%概率触发干扰 # 生成短暂的干扰噪音 crackle = WhiteNoise().to_audio_segment(duration=crackle_duration) crackle = crackle.low_pass_filter(1000) crackle = crackle - 30 # 较大音量 # 在指定位置叠加干扰 position = i if position + crackle_duration < len(audio_segment): audio_segment = audio_segment.overlay(crackle, position=position) # 5. 叠加背景噪音 audio_segment = audio_segment.overlay(noise) # 6. 轻微压缩动态范围模拟电话线路限制 audio_segment = audio_segment.compress_dynamic_range(threshold=-20.0, ratio=4.0) return audio_segment def generate_dial_tone(duration=2000): """生成国际长途拨号音""" # 使用双频拨号音 (440Hz + 350Hz) from pydub.generators import Sine tone1 = Sine(440).to_audio_segment(duration=duration) tone2 = Sine(350).to_audio_segment(duration=duration) dial_tone = tone1.overlay(tone2) dial_tone = dial_tone - 25 # 降低音量 return dial_tone def generate_connect_beep(duration=500): """生成接通提示音""" from pydub.generators import Sine # 使用1000Hz的提示音 beep = Sine(1000).to_audio_segment(duration=duration) beep = beep - 20 return beep # 对话内容(英文,基于论文内容,包含多个角色) dialogue = [ # Host 1 (Male, American) - Alex { "text": "Welcome to Geopolitics Unpacked. I'm Alex.", "voice": "en-US-BrianNeural", "file": "host1_alex_opening.mp3" }, # Host 2 (Female, American) - Sarah { "text": "And I'm Sarah. Today we're discussing Ben Xu's paper 'A Tale of 2 Treaties' and exploring the geopolitical dynamics of the Cold War era.", "voice": "en-US-AriaNeural", "file": "host2_sarah_opening.mp3" }, # Host 1 - Alex { "text": "Sarah, the paper introduces this fascinating concept of '轮庄博弈' (turn-based power game) to explain historical cycles. How does this apply to the rise and fall of the Warsaw Pact and NATO?", "voice": "en-US-BrianNeural", "file": "host1_alex_question.mp3" }, # Host 2 - Sarah { "text": "It's brilliant. The paper argues that just like in a mahjong game, the '庄家' (庄家) tries to maintain power by exploiting the '闲家' (闲家), but eventually gets overthrown by a coalition of the exploited. Applied to the Cold War, this explains how the Soviet Union's attempts to maintain control over its satellite states led to the collapse of the Warsaw Pact.", "voice": "en-US-AriaNeural", "file": "host2_sarah_response.mp3" }, # Guest 1 (Male, Russian accent) - Dmitri { "text": "Hello, this is Dmitri calling from Moscow. I found the paper's analysis of the Soviet Union's collapse particularly insightful. The author mentions how the Soviet Union's focus on military power at the expense of technological innovation led to its decline. Do you think this is still relevant today?", "voice": "ru-RU-DmitryNeural", "file": "guest1_dmitri_callin.mp3" }, # Host 1 - Alex { "text": "Great question, Dmitri. The paper does highlight how the Soviet Union's decision to abandon the Setun ternary computer in favor of copying IBM's binary systems was a critical mistake. This technological stagnation, combined with the arms race,耗尽了 the Soviet economy. What do you think, Sarah?", "voice": "en-US-BrianNeural", "file": "host1_alex_response_to_dmitri.mp3" }, # Host 2 - Sarah { "text": "Absolutely, Dmitri. The paper's analysis of the '赛博共产主义' (cyber communism) vision that never materialized is fascinating. The Soviet Union had the technical expertise to develop advanced computing systems, but bureaucratic interests and a focus on military might derailed those efforts. This is a cautionary tale for any nation that prioritizes military power over technological innovation.", "voice": "en-US-AriaNeural", "file": "host2_sarah_response_to_dmitri.mp3" }, # Guest 1 - Dmitri { "text": "Thank you. It's interesting to see how the paper connects these historical lessons to contemporary geopolitics. The rise of China as a technological power while maintaining a strong military presence shows that a balance is possible.", "voice": "ru-RU-DmitryNeural", "file": "guest1_dmitri_conclusion.mp3" }, # Host 2 - Sarah { "text": "That's a great point, Dmitri. Thank you for calling in.", "voice": "en-US-AriaNeural", "file": "host2_sarah_thanks_dmitri.mp3" }, # Guest 2 (Female, Indian accent) - Priya { "text": "Hi, this is Priya from New Delhi. I was intrigued by the paper's section on '革命输出的会计困局' (the accounting dilemma of revolution export). The author argues that China's foreign aid policies during the Cold War suffered from conflicting objectives. Could you elaborate on this?", "voice": "en-IN-NeerjaExpressiveNeural", "file": "guest2_priya_callin.mp3" }, # Host 1 - Alex { "text": "Thanks for calling, Priya. The paper uses an accounting metaphor to explain the problem. Traditional tributary systems had clear objectives (maintaining political order), but revolutionary export tried to achieve both political returns and selfless aid simultaneously, leading to confusion and inefficiency. Sarah, could you expand on this?", "voice": "en-US-BrianNeural", "file": "host1_alex_response_to_priya.mp3" }, # Host 2 - Sarah { "text": "Definitely, Priya. The paper argues that this accounting dilemma led to situations where China provided significant aid to countries like Albania and Vietnam without clear strategic returns. When these relationships soured, it created diplomatic challenges. The author suggests that this experience influenced China's more pragmatic foreign aid policies today, which are more focused on mutual benefit through economic cooperation.", "voice": "en-US-AriaNeural", "file": "host2_sarah_response_to_priya.mp3" }, # Guest 2 - Priya { "text": "Fascinating. This perspective helps explain the evolution of China's foreign policy from the Cold War era to today's Belt and Road Initiative. Thank you for the insight.", "voice": "en-IN-NeerjaExpressiveNeural", "file": "guest2_priya_conclusion.mp3" }, # Host 1 - Alex { "text": "Thank you, Priya. It's been great having both of you on the show today.", "voice": "en-US-BrianNeural", "file": "host1_alex_final_thanks.mp3" }, # Host 2 - Sarah { "text": "Join us next time as we continue exploring the insights from Ben Xu's 'A Tale of 2 Treaties' and their relevance to contemporary geopolitics. Until then, this is Geopolitics Unpacked signing off.", "voice": "en-US-AriaNeural", "file": "host2_sarah_final.mp3" } ] # 生成每个角色的音频片段和对应的SRT字幕 print("Generating audio segments and subtitles...") for item in dialogue: file_path = os.path.join(output_dir, item["file"]) srt_path = os.path.join(output_dir, os.path.splitext(item["file"])[0] + ".srt") cmd = [ "edge-tts", "--voice", item["voice"], "--text", item["text"], "--write-media", file_path, "--write-subtitles", srt_path ] subprocess.run(cmd, check=True) print(f"Generated: {item['file']} and {os.path.basename(srt_path)}") # 拼接音频片段 print("\nConcatenating audio segments...") combined = AudioSegment.empty() for item in dialogue: file_path = os.path.join(output_dir, item["file"]) audio = AudioSegment.from_mp3(file_path) # 检查是否为call-in嘉宾(文件名包含'callin') if 'callin' in item["file"].lower(): print(f" Applying phone effect to: {item['file']}") audio = apply_phone_effect(audio, add_dial_tone=True) # 添加拨号音 # 保存处理后的版本 phone_file_path = os.path.join(output_dir, item["file"].replace('.mp3', '_phone.mp3')) audio.export(phone_file_path, format="mp3") combined += audio # 输出完整播客文件 output_file = os.path.join(output_dir, "multi_guest_callin_podcast.mp3") combined.export(output_file, format="mp3") print(f"\nComplete podcast saved to: {output_file}") # 合并SRT字幕文件 print("\nMerging subtitle files...") def parse_srt_time(time_str): """解析SRT时间格式为毫秒""" h, m, s_ms = time_str.split(':') s, ms = s_ms.split(',') return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms) def format_srt_time(ms): """将毫秒格式化为SRT时间格式""" h = ms // 3600000 ms %= 3600000 m = ms // 60000 ms %= 60000 s = ms // 1000 ms %= 1000 return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" merged_subtitles = [] current_time = 0 # 累计时间偏移(毫秒) subtitle_index = 1 for item in dialogue: srt_path = os.path.join(output_dir, os.path.splitext(item["file"])[0] + ".srt") # 读取SRT文件 with open(srt_path, 'r', encoding='utf-8') as f: lines = f.readlines() # 解析字幕内容 i = 0 while i < len(lines): line = lines[i].strip() if line.isdigit(): # 字幕序号 i += 1 # 时间线 time_line = lines[i].strip() start_time_str, end_time_str = time_line.split(' --> ') start_time = parse_srt_time(start_time_str) end_time = parse_srt_time(end_time_str) i += 1 # 字幕文本 text_lines = [] while i < len(lines) and lines[i].strip(): text_lines.append(lines[i].strip()) i += 1 text = '\n'.join(text_lines) # 调整时间戳 adjusted_start = current_time + start_time adjusted_end = current_time + end_time # 添加到合并列表 merged_subtitles.append({ 'index': subtitle_index, 'start': adjusted_start, 'end': adjusted_end, 'text': text }) subtitle_index += 1 i += 1 # 更新累计时间偏移 file_path = os.path.join(output_dir, item["file"]) # 如果文件被处理过,使用处理后的文件计算时长 phone_file_path = os.path.join(output_dir, item["file"].replace('.mp3', '_phone.mp3')) if os.path.exists(phone_file_path): audio = AudioSegment.from_mp3(phone_file_path) else: audio = AudioSegment.from_mp3(file_path) current_time += len(audio) # len(audio)返回毫秒数 # 生成合并后的SRT文件 output_srt = os.path.join(output_dir, "multi_guest_callin_podcast.srt") with open(output_srt, 'w', encoding='utf-8') as f: for sub in merged_subtitles: f.write(f"{sub['index']}\n") f.write(f"{format_srt_time(sub['start'])} --> {format_srt_time(sub['end'])}\n") f.write(f"{sub['text']}\n\n") print(f"\nComplete subtitle file saved to: {output_srt}") print("\nPodcast generation completed successfully!")