import numpy as np from transformers import pipeline print("Loading models...") model_a = pipeline( "audio-classification", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition" ) model_b = pipeline( "audio-classification", model="superb/wav2vec2-base-superb-er" ) print("Models ready") def fuse_results(results_a, results_b): combined = {} for item in results_a: combined[item["label"]] = item["score"] for item in results_b: if item["label"] in combined: combined[item["label"]] += item["score"] else: combined[item["label"]] = item["score"] final = [] for emotion, score in combined.items(): final.append({ "emotion": emotion, "confidence": float(score / 2) }) final = sorted(final, key=lambda x: x["confidence"], reverse=True) return final def predict_emotion(audio): res_a = model_a(audio) res_b = model_b(audio) return fuse_results(res_a, res_b)