Spaces:
No application file
No application file
| import numpy as np | |
| from transformers import pipeline | |
| print("Loading models...") | |
| model_a = pipeline( | |
| "audio-classification", | |
| model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition" | |
| ) | |
| model_b = pipeline( | |
| "audio-classification", | |
| model="superb/wav2vec2-base-superb-er" | |
| ) | |
| print("Models ready") | |
| def fuse_results(results_a, results_b): | |
| combined = {} | |
| for item in results_a: | |
| combined[item["label"]] = item["score"] | |
| for item in results_b: | |
| if item["label"] in combined: | |
| combined[item["label"]] += item["score"] | |
| else: | |
| combined[item["label"]] = item["score"] | |
| final = [] | |
| for emotion, score in combined.items(): | |
| final.append({ | |
| "emotion": emotion, | |
| "confidence": float(score / 2) | |
| }) | |
| final = sorted(final, key=lambda x: x["confidence"], reverse=True) | |
| return final | |
| def predict_emotion(audio): | |
| res_a = model_a(audio) | |
| res_b = model_b(audio) | |
| return fuse_results(res_a, res_b) |