HY-MT1.5-1.8B / model_gguf.py
playmak3r's picture
add gguf
6990950
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModelForCausalLM
model_id = "tencent/HY-MT1.5-1.8B-GGUF"
gguf_file = "HY-MT1.5-1.8B-Q8_0.gguf"
local_dir = "./models"
model_path = snapshot_download(model_id, local_dir=local_dir)
tokenizer = AutoTokenizer.from_pretrained(model_path, gguf_file=gguf_file)
model = AutoModelForCausalLM.from_pretrained(model_path, gguf_file=gguf_file)
def run(
text: str = "It’s on the house.",
target_language: str = "Portuguese",
):
messages = [
{
"role": "user",
"content": f"Translate the following segment into {target_language}, without additional explanation.\n\n{text}"
},
]
tokenized_chat = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=False,
return_tensors="pt"
)
input_ids = tokenized_chat.to(model.device)
input_length = input_ids.shape[1]
outputs = model.generate(input_ids, max_new_tokens=2048)
# 2. Fatiamos o tensor: pegamos do [input_length:] até o fim
# Isso isola apenas os tokens novos gerados
generated_tokens = outputs[0][input_length:]
output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
return output_text
if __name__ == "__main__":
translated_text = run("Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.")
print(translated_text)