| | from typing import Dict, List, Any |
| | from optimum.onnxruntime import ORTModelForQuestionAnswering |
| | from transformers import AutoTokenizer, pipeline |
| |
|
| |
|
| | class EndpointHandler(): |
| | def __init__(self, path=""): |
| | |
| | self.model = ORTModelForQuestionAnswering.from_pretrained(path, file_name="model_optimized_quantized.onnx") |
| | self.tokenizer = AutoTokenizer.from_pretrained(path) |
| | |
| | self.pipeline = pipeline("question-answering", model=self.model, tokenizer=self.tokenizer) |
| |
|
| | def __call__(self, data: Any) -> List[List[Dict[str, float]]]: |
| | """ |
| | Args: |
| | data (:obj:): |
| | includes the input data and the parameters for the inference. |
| | Return: |
| | A :obj:`list`:. The list contains the answer and scores of the inference inputs |
| | """ |
| | inputs = data.get("inputs", data) |
| | |
| | prediction = self.pipeline(**inputs) |
| | |
| | return prediction |
| |
|