| from typing import Dict, List, Any |
| from kokoro import KPipeline |
| from IPython.display import display, Audio |
| import soundfile as sf |
| import torch |
| import io |
| import os |
| import base64 |
|
|
| class EndpointHandler(): |
| def __init__(self, model_dir: str): |
| self.pipeline = KPipeline(lang_code='a') |
|
|
| def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: |
| inputs = data.get("inputs", {}) |
| text = inputs.get("text") |
| voice = inputs.get("voice") |
| |
| audio_segments = [] |
| generator = self.pipeline(text, voice) |
| |
| |
| for i, (gs, ps, audio) in enumerate(generator): |
| audio_segments.append(audio) |
| |
| |
| full_audio = torch.cat([torch.tensor(a) for a in audio_segments]) |
|
|
| sample_rate = 24000 |
| audio_length_seconds = len(full_audio) / sample_rate |
| |
| |
| buffer = io.BytesIO() |
| sf.write(buffer, full_audio.numpy(), 24000, format='WAV') |
| buffer.seek(0) |
| audio_bytes = buffer.read() |
| audio_b64 = base64.b64encode(audio_bytes).decode("utf-8") |
| return { |
| "headers": { |
| "Content-Disposition": "attachment; filename=output.wav", |
| "Content-Type": "audio/wav" |
| }, |
| "body": audio_b64, |
| "statusCode": 200, |
| "isBase64Encoded": True, |
| "audio_length_seconds": audio_length_seconds |
| } |