Initial commit for TTS project
This commit is contained in:
198
scripts/analysis/analyze_accent_verification.py
Normal file
198
scripts/analysis/analyze_accent_verification.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze accent verification files to check for distinct accent characteristics
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import scipy.signal
|
||||
from scipy.stats import skew, kurtosis
|
||||
|
||||
# Paths
|
||||
WORKSPACE = "/root/tts"
|
||||
ACCENT_DIR = os.path.join(WORKSPACE, "accent_verification")
|
||||
|
||||
def calculate_rms(audio_data):
|
||||
"""Calculate RMS energy"""
|
||||
return np.sqrt(np.mean(audio_data**2))
|
||||
|
||||
def calculate_peak_amplitude(audio_data):
|
||||
"""Calculate peak amplitude"""
|
||||
return np.max(np.abs(audio_data))
|
||||
|
||||
def calculate_zero_crossing_rate(audio_data):
|
||||
"""Calculate zero crossing rate"""
|
||||
return np.mean(np.abs(np.diff(np.sign(audio_data))))
|
||||
|
||||
def calculate_spectral_centroid(audio_data, sample_rate):
|
||||
"""Calculate spectral centroid"""
|
||||
frequencies, times, Sxx = scipy.signal.spectrogram(audio_data, sample_rate)
|
||||
if np.sum(Sxx) == 0:
|
||||
return 0
|
||||
spectral_centroid = np.sum(frequencies[:, np.newaxis] * Sxx) / np.sum(Sxx)
|
||||
return spectral_centroid
|
||||
|
||||
def calculate_skewness(audio_data):
|
||||
"""Calculate skewness"""
|
||||
return skew(audio_data)
|
||||
|
||||
def calculate_kurtosis(audio_data):
|
||||
"""Calculate kurtosis"""
|
||||
return kurtosis(audio_data)
|
||||
|
||||
def analyze_audio_quality(audio_data, sample_rate, filename):
|
||||
"""Analyze audio quality"""
|
||||
rms = calculate_rms(audio_data)
|
||||
peak = calculate_peak_amplitude(audio_data)
|
||||
zcr = calculate_zero_crossing_rate(audio_data)
|
||||
spectral_centroid = calculate_spectral_centroid(audio_data, sample_rate)
|
||||
skewness = calculate_skewness(audio_data)
|
||||
kurt = calculate_kurtosis(audio_data)
|
||||
|
||||
# Quality scoring
|
||||
score = 0
|
||||
if 0.05 <= rms <= 0.3:
|
||||
score += 20
|
||||
if peak <= 1.0:
|
||||
score += 20
|
||||
if 0.05 <= zcr <= 0.3:
|
||||
score += 20
|
||||
if 400 <= spectral_centroid <= 3000:
|
||||
score += 20
|
||||
if -1 <= skewness <= 1:
|
||||
score += 10
|
||||
if kurt <= 10:
|
||||
score += 10
|
||||
|
||||
return {
|
||||
'rms': rms,
|
||||
'peak': peak,
|
||||
'zcr': zcr,
|
||||
'spectral_centroid': spectral_centroid,
|
||||
'skewness': skewness,
|
||||
'kurtosis': kurt,
|
||||
'score': min(score, 100)
|
||||
}
|
||||
|
||||
def analyze_accent_verification():
|
||||
"""Analyze accent verification files"""
|
||||
print("=" * 70)
|
||||
print("ANALYZING ACCENT VERIFICATION FILES")
|
||||
print("=" * 70)
|
||||
|
||||
accent_files = []
|
||||
emotion_files = []
|
||||
|
||||
# Get all files
|
||||
for filename in os.listdir(ACCENT_DIR):
|
||||
if filename.endswith('.wav'):
|
||||
file_path = os.path.join(ACCENT_DIR, filename)
|
||||
if 'accent' in filename:
|
||||
accent_files.append((filename, file_path))
|
||||
elif 'emotion' in filename:
|
||||
emotion_files.append((filename, file_path))
|
||||
|
||||
# Analyze accent files
|
||||
print("\n🔊 ACCENT FILES ANALYSIS:")
|
||||
print("-" * 70)
|
||||
|
||||
accent_stats = []
|
||||
for filename, file_path in accent_files:
|
||||
try:
|
||||
audio_data, sample_rate = sf.read(file_path)
|
||||
duration = len(audio_data) / sample_rate
|
||||
|
||||
stats = analyze_audio_quality(audio_data, sample_rate, filename)
|
||||
|
||||
accent_stats.append({
|
||||
'filename': filename,
|
||||
'duration': duration,
|
||||
'rms': stats['rms'],
|
||||
'zcr': stats['zcr'],
|
||||
'spectral_centroid': stats['spectral_centroid'],
|
||||
'score': stats['score']
|
||||
})
|
||||
|
||||
print(f"✓ {filename}")
|
||||
print(f" Duration: {duration:.2f}s, RMS: {stats['rms']:.4f}, ZCR: {stats['zcr']:.4f}, Centroid: {stats['spectral_centroid']:.1f}Hz, Score: {stats['score']}/100")
|
||||
print()
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ {filename}: Error - {e}")
|
||||
print()
|
||||
|
||||
# Analyze emotion files
|
||||
print("\n😊 EMOTION FILES ANALYSIS:")
|
||||
print("-" * 70)
|
||||
|
||||
emotion_stats = []
|
||||
for filename, file_path in emotion_files:
|
||||
try:
|
||||
audio_data, sample_rate = sf.read(file_path)
|
||||
duration = len(audio_data) / sample_rate
|
||||
|
||||
stats = analyze_audio_quality(audio_data, sample_rate, filename)
|
||||
|
||||
emotion_stats.append({
|
||||
'filename': filename,
|
||||
'duration': duration,
|
||||
'rms': stats['rms'],
|
||||
'zcr': stats['zcr'],
|
||||
'spectral_centroid': stats['spectral_centroid'],
|
||||
'score': stats['score']
|
||||
})
|
||||
|
||||
print(f"✓ {filename}")
|
||||
print(f" Duration: {duration:.2f}s, RMS: {stats['rms']:.4f}, ZCR: {stats['zcr']:.4f}, Centroid: {stats['spectral_centroid']:.1f}Hz, Score: {stats['score']}/100")
|
||||
print()
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ {filename}: Error - {e}")
|
||||
print()
|
||||
|
||||
# Compare accent characteristics
|
||||
print("\n📊 ACCENT COMPARISON:")
|
||||
print("-" * 70)
|
||||
print("Filename | Duration | RMS | ZCR | Centroid | Score")
|
||||
print("-" * 70)
|
||||
|
||||
for stats in sorted(accent_stats, key=lambda x: x['filename']):
|
||||
print(f"{stats['filename']:24} | {stats['duration']:8.2f} | {stats['rms']:6.4f} | {stats['zcr']:6.4f} | {stats['spectral_centroid']:8.1f} | {stats['score']:5}")
|
||||
|
||||
# Compare emotion characteristics
|
||||
print("\n📊 EMOTION COMPARISON:")
|
||||
print("-" * 70)
|
||||
print("Filename | Duration | RMS | ZCR | Centroid | Score")
|
||||
print("-" * 70)
|
||||
|
||||
for stats in sorted(emotion_stats, key=lambda x: x['filename']):
|
||||
print(f"{stats['filename']:24} | {stats['duration']:8.2f} | {stats['rms']:6.4f} | {stats['zcr']:6.4f} | {stats['spectral_centroid']:8.1f} | {stats['score']:5}")
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"Total accent files: {len(accent_files)}")
|
||||
print(f"Total emotion files: {len(emotion_files)}")
|
||||
|
||||
# Check if accents are distinct
|
||||
if len(accent_stats) >= 2:
|
||||
centroid_values = [s['spectral_centroid'] for s in accent_stats]
|
||||
centroid_std = np.std(centroid_values)
|
||||
zcr_values = [s['zcr'] for s in accent_stats]
|
||||
zcr_std = np.std(zcr_values)
|
||||
|
||||
print(f"\nAccent distinctiveness metrics:")
|
||||
print(f"Spectral centroid std: {centroid_std:.2f}Hz (higher = more distinct)")
|
||||
print(f"Zero crossing rate std: {zcr_std:.4f} (higher = more distinct)")
|
||||
|
||||
if centroid_std > 50 or zcr_std > 0.02:
|
||||
print("✅ Accents appear to be distinct based on acoustic features")
|
||||
else:
|
||||
print("⚠️ Accents may sound similar based on acoustic features")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_accent_verification()
|
||||
208
scripts/analysis/analyze_audio_quality.py
Normal file
208
scripts/analysis/analyze_audio_quality.py
Normal file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Audio quality analysis tool for VoxCPM generated files
|
||||
Analyzes waveform characteristics to determine if audio sounds human
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy import signal
|
||||
from scipy.stats import skew, kurtosis
|
||||
|
||||
def analyze_audio_file(file_path):
|
||||
"""Analyze audio file and return quality metrics"""
|
||||
if not os.path.exists(file_path):
|
||||
print(f"File not found: {file_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Read audio file
|
||||
audio_data, sample_rate = sf.read(file_path)
|
||||
print(f"✓ Successfully loaded: {os.path.basename(file_path)}")
|
||||
print(f" Sample rate: {sample_rate} Hz")
|
||||
print(f" Duration: {len(audio_data)/sample_rate:.2f} seconds")
|
||||
print(f" Channels: {1 if len(audio_data.shape) == 1 else audio_data.shape[1]}")
|
||||
|
||||
# Convert to mono if stereo
|
||||
if len(audio_data.shape) > 1:
|
||||
audio_data = np.mean(audio_data, axis=1)
|
||||
|
||||
# Basic audio statistics
|
||||
rms_energy = np.sqrt(np.mean(audio_data**2))
|
||||
peak_amplitude = np.max(np.abs(audio_data))
|
||||
zero_crossing_rate = np.mean(np.abs(np.diff(np.sign(audio_data))))
|
||||
spectral_centroid = calculate_spectral_centroid(audio_data, sample_rate)
|
||||
skewness = skew(audio_data)
|
||||
kurt = kurtosis(audio_data)
|
||||
|
||||
print(f"\n📊 Audio Statistics:")
|
||||
print(f" RMS Energy: {rms_energy:.4f}")
|
||||
print(f" Peak Amplitude: {peak_amplitude:.4f}")
|
||||
print(f" Zero Crossing Rate: {zero_crossing_rate:.4f}")
|
||||
print(f" Spectral Centroid: {spectral_centroid:.2f} Hz")
|
||||
print(f" Skewness: {skewness:.4f}")
|
||||
print(f" Kurtosis: {kurt:.4f}")
|
||||
|
||||
# Quality assessment
|
||||
quality_score = assess_audio_quality({
|
||||
'rms_energy': rms_energy,
|
||||
'zero_crossing_rate': zero_crossing_rate,
|
||||
'spectral_centroid': spectral_centroid,
|
||||
'skewness': skewness,
|
||||
'kurtosis': kurt,
|
||||
'duration': len(audio_data)/sample_rate
|
||||
})
|
||||
|
||||
return {
|
||||
'file': file_path,
|
||||
'sample_rate': sample_rate,
|
||||
'duration': len(audio_data)/sample_rate,
|
||||
'rms_energy': rms_energy,
|
||||
'zero_crossing_rate': zero_crossing_rate,
|
||||
'spectral_centroid': spectral_centroid,
|
||||
'quality_score': quality_score,
|
||||
'quality': 'good' if quality_score > 60 else 'poor'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error analyzing {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def calculate_spectral_centroid(audio_data, sample_rate):
|
||||
"""Calculate spectral centroid (brightness of sound)"""
|
||||
# Compute spectrogram
|
||||
frequencies, times, Sxx = signal.spectrogram(audio_data, sample_rate)
|
||||
|
||||
# Calculate spectral centroid
|
||||
if np.sum(Sxx) == 0:
|
||||
return 0
|
||||
|
||||
spectral_centroid = np.sum(frequencies[:, np.newaxis] * Sxx) / np.sum(Sxx)
|
||||
return spectral_centroid
|
||||
|
||||
def assess_audio_quality(metrics):
|
||||
"""Assess audio quality based on metrics"""
|
||||
score = 0
|
||||
|
||||
# RMS Energy: Good range for speech is 0.05-0.3
|
||||
rms = metrics['rms_energy']
|
||||
if 0.05 <= rms <= 0.3:
|
||||
score += 20
|
||||
elif 0.02 <= rms < 0.05 or 0.3 < rms <= 0.5:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Zero Crossing Rate: Good range for speech is 0.05-0.15
|
||||
zcr = metrics['zero_crossing_rate']
|
||||
if 0.05 <= zcr <= 0.15:
|
||||
score += 20
|
||||
elif 0.02 <= zcr < 0.05 or 0.15 < zcr <= 0.2:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Spectral Centroid: Good range for speech is 800-2500 Hz
|
||||
sc = metrics['spectral_centroid']
|
||||
if 800 <= sc <= 2500:
|
||||
score += 20
|
||||
elif 500 <= sc < 800 or 2500 < sc <= 3500:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Duration: Speech should be reasonable length
|
||||
duration = metrics['duration']
|
||||
if 1.0 <= duration <= 10.0:
|
||||
score += 20
|
||||
elif 0.5 <= duration < 1.0 or 10.0 < duration <= 15.0:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Skewness and Kurtosis: Should be moderate for natural speech
|
||||
skewness = abs(metrics['skewness'])
|
||||
kurtosis = abs(metrics['kurtosis'])
|
||||
if skewness < 2 and kurtosis < 10:
|
||||
score += 20
|
||||
elif skewness < 5 and kurtosis < 20:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
return score
|
||||
|
||||
def analyze_directory(directory):
|
||||
"""Analyze all audio files in a directory"""
|
||||
if not os.path.exists(directory):
|
||||
print(f"Directory not found: {directory}")
|
||||
return
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"ANALYZING AUDIO FILES IN: {directory}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
audio_files = [f for f in os.listdir(directory) if f.endswith('.wav')]
|
||||
|
||||
if not audio_files:
|
||||
print("No WAV files found")
|
||||
return
|
||||
|
||||
results = []
|
||||
for audio_file in audio_files:
|
||||
file_path = os.path.join(directory, audio_file)
|
||||
result = analyze_audio_file(file_path)
|
||||
if result:
|
||||
results.append(result)
|
||||
print(f" Quality Score: {result['quality_score']}/100 ({result['quality']})")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Summary
|
||||
if results:
|
||||
good_files = [r['file'] for r in results if r['quality'] == 'good']
|
||||
poor_files = [r['file'] for r in results if r['quality'] == 'poor']
|
||||
|
||||
print(f"\n📋 Summary:")
|
||||
print(f"Total files analyzed: {len(results)}")
|
||||
print(f"Good quality files: {len(good_files)}")
|
||||
print(f"Poor quality files: {len(poor_files)}")
|
||||
|
||||
if good_files:
|
||||
print("\nGood quality examples:")
|
||||
for f in good_files[:3]:
|
||||
print(f" - {os.path.basename(f)}")
|
||||
|
||||
if poor_files:
|
||||
print("\nPoor quality examples:")
|
||||
for f in poor_files[:3]:
|
||||
print(f" - {os.path.basename(f)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Analyze both accent demo directories
|
||||
analyze_directory("accent_demos")
|
||||
analyze_directory("accent_demos_optimized")
|
||||
|
||||
# Also analyze the reference audio files
|
||||
print(f"\n{'='*60}")
|
||||
print(f"ANALYZING REFERENCE AUDIO FILES")
|
||||
print(f"{'='*60}")
|
||||
|
||||
reference_files = [
|
||||
"reference_indian.wav",
|
||||
"reference_russian.wav",
|
||||
"reference_singaporean.wav",
|
||||
"reference_hongkong.wav",
|
||||
"reference_cantonese.wav",
|
||||
"reference_indian_opt.wav",
|
||||
"reference_russian_opt.wav",
|
||||
"reference_singaporean_opt.wav",
|
||||
"reference_hongkong_opt.wav",
|
||||
"reference_cantonese_opt.wav"
|
||||
]
|
||||
|
||||
for ref_file in reference_files:
|
||||
if os.path.exists(ref_file):
|
||||
analyze_audio_file(ref_file)
|
||||
print(f"{'='*60}")
|
||||
186
scripts/analysis/analyze_local_accent_demos.py
Normal file
186
scripts/analysis/analyze_local_accent_demos.py
Normal file
@@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze only the local accent demos
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from scipy import signal
|
||||
from scipy.stats import skew, kurtosis
|
||||
|
||||
def analyze_audio_file(file_path):
|
||||
"""Analyze audio file and return quality metrics"""
|
||||
if not os.path.exists(file_path):
|
||||
print(f"File not found: {file_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Read audio file
|
||||
audio_data, sample_rate = sf.read(file_path)
|
||||
print(f"✓ Successfully loaded: {os.path.basename(file_path)}")
|
||||
print(f" Sample rate: {sample_rate} Hz")
|
||||
print(f" Duration: {len(audio_data)/sample_rate:.2f} seconds")
|
||||
print(f" Channels: {1 if len(audio_data.shape) == 1 else audio_data.shape[1]}")
|
||||
|
||||
# Convert to mono if stereo
|
||||
if len(audio_data.shape) > 1:
|
||||
audio_data = np.mean(audio_data, axis=1)
|
||||
|
||||
# Basic audio statistics
|
||||
rms_energy = np.sqrt(np.mean(audio_data**2))
|
||||
peak_amplitude = np.max(np.abs(audio_data))
|
||||
zero_crossing_rate = np.mean(np.abs(np.diff(np.sign(audio_data))))
|
||||
spectral_centroid = calculate_spectral_centroid(audio_data, sample_rate)
|
||||
skewness = skew(audio_data)
|
||||
kurt = kurtosis(audio_data)
|
||||
|
||||
print(f"\n📊 Audio Statistics:")
|
||||
print(f" RMS Energy: {rms_energy:.4f}")
|
||||
print(f" Peak Amplitude: {peak_amplitude:.4f}")
|
||||
print(f" Zero Crossing Rate: {zero_crossing_rate:.4f}")
|
||||
print(f" Spectral Centroid: {spectral_centroid:.2f} Hz")
|
||||
print(f" Skewness: {skewness:.4f}")
|
||||
print(f" Kurtosis: {kurt:.4f}")
|
||||
|
||||
# Quality assessment
|
||||
quality_score = assess_audio_quality({
|
||||
'rms_energy': rms_energy,
|
||||
'zero_crossing_rate': zero_crossing_rate,
|
||||
'spectral_centroid': spectral_centroid,
|
||||
'skewness': skewness,
|
||||
'kurtosis': kurt,
|
||||
'duration': len(audio_data)/sample_rate
|
||||
})
|
||||
|
||||
quality = 'good' if quality_score > 60 else 'poor'
|
||||
print(f" Quality Score: {quality_score}/100 ({quality})")
|
||||
|
||||
return {
|
||||
'file': file_path,
|
||||
'sample_rate': sample_rate,
|
||||
'duration': len(audio_data)/sample_rate,
|
||||
'rms_energy': rms_energy,
|
||||
'zero_crossing_rate': zero_crossing_rate,
|
||||
'spectral_centroid': spectral_centroid,
|
||||
'quality_score': quality_score,
|
||||
'quality': quality
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error analyzing {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def calculate_spectral_centroid(audio_data, sample_rate):
|
||||
"""Calculate spectral centroid (brightness of sound)"""
|
||||
# Compute spectrogram
|
||||
frequencies, times, Sxx = signal.spectrogram(audio_data, sample_rate)
|
||||
|
||||
# Calculate spectral centroid
|
||||
if np.sum(Sxx) == 0:
|
||||
return 0
|
||||
|
||||
spectral_centroid = np.sum(frequencies[:, np.newaxis] * Sxx) / np.sum(Sxx)
|
||||
return spectral_centroid
|
||||
|
||||
def assess_audio_quality(metrics):
|
||||
"""Assess audio quality based on metrics"""
|
||||
score = 0
|
||||
|
||||
# RMS Energy: Good range for speech is 0.05-0.3
|
||||
rms = metrics['rms_energy']
|
||||
if 0.05 <= rms <= 0.3:
|
||||
score += 20
|
||||
elif 0.02 <= rms < 0.05 or 0.3 < rms <= 0.5:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Zero Crossing Rate: Good range for speech is 0.05-0.15
|
||||
zcr = metrics['zero_crossing_rate']
|
||||
if 0.05 <= zcr <= 0.15:
|
||||
score += 20
|
||||
elif 0.02 <= zcr < 0.05 or 0.15 < zcr <= 0.2:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Spectral Centroid: Good range for speech is 800-2500 Hz
|
||||
sc = metrics['spectral_centroid']
|
||||
if 800 <= sc <= 2500:
|
||||
score += 20
|
||||
elif 500 <= sc < 800 or 2500 < sc <= 3500:
|
||||
score += 10
|
||||
elif 200 <= sc < 500:
|
||||
score += 5
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Duration: Speech should be reasonable length
|
||||
duration = metrics['duration']
|
||||
if 1.0 <= duration <= 10.0:
|
||||
score += 20
|
||||
elif 0.5 <= duration < 1.0 or 10.0 < duration <= 15.0:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
# Skewness and Kurtosis: Should be moderate for natural speech
|
||||
skewness = abs(metrics['skewness'])
|
||||
kurtosis = abs(metrics['kurtosis'])
|
||||
if skewness < 2 and kurtosis < 10:
|
||||
score += 20
|
||||
elif skewness < 5 and kurtosis < 20:
|
||||
score += 10
|
||||
else:
|
||||
score += 0
|
||||
|
||||
return score
|
||||
|
||||
def analyze_directory(directory):
|
||||
"""Analyze all audio files in a directory"""
|
||||
if not os.path.exists(directory):
|
||||
print(f"Directory not found: {directory}")
|
||||
return
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"ANALYZING LOCAL ACCENT DEMOS: {directory}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
audio_files = [f for f in os.listdir(directory) if f.endswith('.wav')]
|
||||
|
||||
if not audio_files:
|
||||
print("No WAV files found")
|
||||
return
|
||||
|
||||
results = []
|
||||
for audio_file in audio_files:
|
||||
file_path = os.path.join(directory, audio_file)
|
||||
result = analyze_audio_file(file_path)
|
||||
if result:
|
||||
results.append(result)
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Summary
|
||||
if results:
|
||||
good_files = [r['file'] for r in results if r['quality'] == 'good']
|
||||
poor_files = [r['file'] for r in results if r['quality'] == 'poor']
|
||||
|
||||
print(f"\n📋 Summary:")
|
||||
print(f"Total files analyzed: {len(results)}")
|
||||
print(f"Good quality files: {len(good_files)}")
|
||||
print(f"Poor quality files: {len(poor_files)}")
|
||||
|
||||
if good_files:
|
||||
print("\nGood quality examples:")
|
||||
for f in good_files[:3]:
|
||||
print(f" - {os.path.basename(f)}")
|
||||
|
||||
if poor_files:
|
||||
print("\nPoor quality examples:")
|
||||
for f in poor_files[:3]:
|
||||
print(f" - {os.path.basename(f)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Analyze only the local accent demos
|
||||
analyze_directory("accent_demos_local")
|
||||
Reference in New Issue
Block a user