199 lines
6.6 KiB
Python
199 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Analyze accent verification files to check for distinct accent characteristics
|
|
"""
|
|
|
|
import os
|
|
import numpy as np
|
|
import soundfile as sf
|
|
import scipy.signal
|
|
from scipy.stats import skew, kurtosis
|
|
|
|
# Paths
|
|
WORKSPACE = "/root/tts"
|
|
ACCENT_DIR = os.path.join(WORKSPACE, "accent_verification")
|
|
|
|
def calculate_rms(audio_data):
|
|
"""Calculate RMS energy"""
|
|
return np.sqrt(np.mean(audio_data**2))
|
|
|
|
def calculate_peak_amplitude(audio_data):
|
|
"""Calculate peak amplitude"""
|
|
return np.max(np.abs(audio_data))
|
|
|
|
def calculate_zero_crossing_rate(audio_data):
|
|
"""Calculate zero crossing rate"""
|
|
return np.mean(np.abs(np.diff(np.sign(audio_data))))
|
|
|
|
def calculate_spectral_centroid(audio_data, sample_rate):
|
|
"""Calculate spectral centroid"""
|
|
frequencies, times, Sxx = scipy.signal.spectrogram(audio_data, sample_rate)
|
|
if np.sum(Sxx) == 0:
|
|
return 0
|
|
spectral_centroid = np.sum(frequencies[:, np.newaxis] * Sxx) / np.sum(Sxx)
|
|
return spectral_centroid
|
|
|
|
def calculate_skewness(audio_data):
|
|
"""Calculate skewness"""
|
|
return skew(audio_data)
|
|
|
|
def calculate_kurtosis(audio_data):
|
|
"""Calculate kurtosis"""
|
|
return kurtosis(audio_data)
|
|
|
|
def analyze_audio_quality(audio_data, sample_rate, filename):
|
|
"""Analyze audio quality"""
|
|
rms = calculate_rms(audio_data)
|
|
peak = calculate_peak_amplitude(audio_data)
|
|
zcr = calculate_zero_crossing_rate(audio_data)
|
|
spectral_centroid = calculate_spectral_centroid(audio_data, sample_rate)
|
|
skewness = calculate_skewness(audio_data)
|
|
kurt = calculate_kurtosis(audio_data)
|
|
|
|
# Quality scoring
|
|
score = 0
|
|
if 0.05 <= rms <= 0.3:
|
|
score += 20
|
|
if peak <= 1.0:
|
|
score += 20
|
|
if 0.05 <= zcr <= 0.3:
|
|
score += 20
|
|
if 400 <= spectral_centroid <= 3000:
|
|
score += 20
|
|
if -1 <= skewness <= 1:
|
|
score += 10
|
|
if kurt <= 10:
|
|
score += 10
|
|
|
|
return {
|
|
'rms': rms,
|
|
'peak': peak,
|
|
'zcr': zcr,
|
|
'spectral_centroid': spectral_centroid,
|
|
'skewness': skewness,
|
|
'kurtosis': kurt,
|
|
'score': min(score, 100)
|
|
}
|
|
|
|
def analyze_accent_verification():
|
|
"""Analyze accent verification files"""
|
|
print("=" * 70)
|
|
print("ANALYZING ACCENT VERIFICATION FILES")
|
|
print("=" * 70)
|
|
|
|
accent_files = []
|
|
emotion_files = []
|
|
|
|
# Get all files
|
|
for filename in os.listdir(ACCENT_DIR):
|
|
if filename.endswith('.wav'):
|
|
file_path = os.path.join(ACCENT_DIR, filename)
|
|
if 'accent' in filename:
|
|
accent_files.append((filename, file_path))
|
|
elif 'emotion' in filename:
|
|
emotion_files.append((filename, file_path))
|
|
|
|
# Analyze accent files
|
|
print("\n🔊 ACCENT FILES ANALYSIS:")
|
|
print("-" * 70)
|
|
|
|
accent_stats = []
|
|
for filename, file_path in accent_files:
|
|
try:
|
|
audio_data, sample_rate = sf.read(file_path)
|
|
duration = len(audio_data) / sample_rate
|
|
|
|
stats = analyze_audio_quality(audio_data, sample_rate, filename)
|
|
|
|
accent_stats.append({
|
|
'filename': filename,
|
|
'duration': duration,
|
|
'rms': stats['rms'],
|
|
'zcr': stats['zcr'],
|
|
'spectral_centroid': stats['spectral_centroid'],
|
|
'score': stats['score']
|
|
})
|
|
|
|
print(f"✓ {filename}")
|
|
print(f" Duration: {duration:.2f}s, RMS: {stats['rms']:.4f}, ZCR: {stats['zcr']:.4f}, Centroid: {stats['spectral_centroid']:.1f}Hz, Score: {stats['score']}/100")
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f"✗ {filename}: Error - {e}")
|
|
print()
|
|
|
|
# Analyze emotion files
|
|
print("\n😊 EMOTION FILES ANALYSIS:")
|
|
print("-" * 70)
|
|
|
|
emotion_stats = []
|
|
for filename, file_path in emotion_files:
|
|
try:
|
|
audio_data, sample_rate = sf.read(file_path)
|
|
duration = len(audio_data) / sample_rate
|
|
|
|
stats = analyze_audio_quality(audio_data, sample_rate, filename)
|
|
|
|
emotion_stats.append({
|
|
'filename': filename,
|
|
'duration': duration,
|
|
'rms': stats['rms'],
|
|
'zcr': stats['zcr'],
|
|
'spectral_centroid': stats['spectral_centroid'],
|
|
'score': stats['score']
|
|
})
|
|
|
|
print(f"✓ {filename}")
|
|
print(f" Duration: {duration:.2f}s, RMS: {stats['rms']:.4f}, ZCR: {stats['zcr']:.4f}, Centroid: {stats['spectral_centroid']:.1f}Hz, Score: {stats['score']}/100")
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f"✗ {filename}: Error - {e}")
|
|
print()
|
|
|
|
# Compare accent characteristics
|
|
print("\n📊 ACCENT COMPARISON:")
|
|
print("-" * 70)
|
|
print("Filename | Duration | RMS | ZCR | Centroid | Score")
|
|
print("-" * 70)
|
|
|
|
for stats in sorted(accent_stats, key=lambda x: x['filename']):
|
|
print(f"{stats['filename']:24} | {stats['duration']:8.2f} | {stats['rms']:6.4f} | {stats['zcr']:6.4f} | {stats['spectral_centroid']:8.1f} | {stats['score']:5}")
|
|
|
|
# Compare emotion characteristics
|
|
print("\n📊 EMOTION COMPARISON:")
|
|
print("-" * 70)
|
|
print("Filename | Duration | RMS | ZCR | Centroid | Score")
|
|
print("-" * 70)
|
|
|
|
for stats in sorted(emotion_stats, key=lambda x: x['filename']):
|
|
print(f"{stats['filename']:24} | {stats['duration']:8.2f} | {stats['rms']:6.4f} | {stats['zcr']:6.4f} | {stats['spectral_centroid']:8.1f} | {stats['score']:5}")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 70)
|
|
print("SUMMARY")
|
|
print("=" * 70)
|
|
print(f"Total accent files: {len(accent_files)}")
|
|
print(f"Total emotion files: {len(emotion_files)}")
|
|
|
|
# Check if accents are distinct
|
|
if len(accent_stats) >= 2:
|
|
centroid_values = [s['spectral_centroid'] for s in accent_stats]
|
|
centroid_std = np.std(centroid_values)
|
|
zcr_values = [s['zcr'] for s in accent_stats]
|
|
zcr_std = np.std(zcr_values)
|
|
|
|
print(f"\nAccent distinctiveness metrics:")
|
|
print(f"Spectral centroid std: {centroid_std:.2f}Hz (higher = more distinct)")
|
|
print(f"Zero crossing rate std: {zcr_std:.4f} (higher = more distinct)")
|
|
|
|
if centroid_std > 50 or zcr_std > 0.02:
|
|
print("✅ Accents appear to be distinct based on acoustic features")
|
|
else:
|
|
print("⚠️ Accents may sound similar based on acoustic features")
|
|
|
|
print("\n" + "=" * 70)
|
|
|
|
if __name__ == "__main__":
|
|
analyze_accent_verification()
|