voice-emotion-api / app /model.py
PsalmsJava's picture
Update app/model.py
9bd70eb verified
# app/model.py
import joblib
import numpy as np
import librosa
import os
# Global placeholders
model = None
scaler = None
# ==========================
# Load Model and Scaler
# ==========================
def load_models(model_path="emotion_model.pkl", scaler_path="scaler.pkl"):
global model, scaler
if not os.path.exists(model_path) or not os.path.exists(scaler_path):
raise FileNotFoundError("Model or scaler .pkl files not found. Upload them to the app directory.")
model = joblib.load(model_path)
scaler = joblib.load(scaler_path)
print("✅ Tone-based emotion model loaded successfully.")
# ==========================
# Feature extraction
# ==========================
def extract_features(audio_path):
"""
Extract tone-based features from audio:
- MFCC
- Pitch
- Energy
"""
audio, sr = librosa.load(audio_path, sr=16000)
mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
mfcc_mean = np.mean(mfcc, axis=1)
pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
energy = np.mean(librosa.feature.rms(y=audio))
return np.hstack([mfcc_mean, pitch, energy])
# ==========================
# Predict Emotion
# ==========================
def predict_tone(audio_path):
global model, scaler
if model is None or scaler is None:
raise RuntimeError("Model and scaler must be loaded first.")
features = extract_features(audio_path).reshape(1, -1)
features_scaled = scaler.transform(features)
pred_label = model.predict(features_scaled)[0]
pred_proba = model.predict_proba(features_scaled)[0]
# Confidence of the predicted class
pred_index = np.argmax(pred_proba)
confidence = float(pred_proba[pred_index])
return {
"emotion_label": pred_label,
"confidence": confidence,
"note": "Tone-based prediction (less text bias)"
}