Spaces:
No application file
No application file
File size: 1,025 Bytes
504bb0e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | import numpy as np
from transformers import pipeline
print("Loading models...")
model_a = pipeline(
"audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
)
model_b = pipeline(
"audio-classification",
model="superb/wav2vec2-base-superb-er"
)
print("Models ready")
def fuse_results(results_a, results_b):
combined = {}
for item in results_a:
combined[item["label"]] = item["score"]
for item in results_b:
if item["label"] in combined:
combined[item["label"]] += item["score"]
else:
combined[item["label"]] = item["score"]
final = []
for emotion, score in combined.items():
final.append({
"emotion": emotion,
"confidence": float(score / 2)
})
final = sorted(final, key=lambda x: x["confidence"], reverse=True)
return final
def predict_emotion(audio):
res_a = model_a(audio)
res_b = model_b(audio)
return fuse_results(res_a, res_b) |