| from application.llm.base import BaseLLM |
| from application.core.settings import settings |
|
|
| class LlamaCpp(BaseLLM): |
|
|
| def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs): |
| global llama |
| try: |
| from llama_cpp import Llama |
| except ImportError: |
| raise ImportError("Please install llama_cpp using pip install llama-cpp-python") |
|
|
| llama = Llama(model_path=llm_name, n_ctx=2048) |
|
|
| def gen(self, model, engine, messages, stream=False, **kwargs): |
| context = messages[0]['content'] |
| user_question = messages[-1]['content'] |
| prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" |
|
|
| result = llama(prompt, max_tokens=150, echo=False) |
|
|
| |
| |
| |
| return result['choices'][0]['text'].split('### Answer \n')[-1] |
|
|
| def gen_stream(self, model, engine, messages, stream=True, **kwargs): |
| context = messages[0]['content'] |
| user_question = messages[-1]['content'] |
| prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" |
|
|
| result = llama(prompt, max_tokens=150, echo=False, stream=stream) |
|
|
| |
| |
|
|
| for item in result: |
| for choice in item['choices']: |
| yield choice['text'] |
|
|