from huggingface_hub import snapshot_download from transformers import AutoTokenizer, AutoModelForCausalLM model_id = "tencent/HY-MT1.5-1.8B-GGUF" gguf_file = "HY-MT1.5-1.8B-Q8_0.gguf" local_dir = "./models" model_path = snapshot_download(model_id, local_dir=local_dir) tokenizer = AutoTokenizer.from_pretrained(model_path, gguf_file=gguf_file) model = AutoModelForCausalLM.from_pretrained(model_path, gguf_file=gguf_file) def run( text: str = "It’s on the house.", target_language: str = "Portuguese", ): messages = [ { "role": "user", "content": f"Translate the following segment into {target_language}, without additional explanation.\n\n{text}" }, ] tokenized_chat = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=False, return_tensors="pt" ) input_ids = tokenized_chat.to(model.device) input_length = input_ids.shape[1] outputs = model.generate(input_ids, max_new_tokens=2048) # 2. Fatiamos o tensor: pegamos do [input_length:] até o fim # Isso isola apenas os tokens novos gerados generated_tokens = outputs[0][input_length:] output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) return output_text if __name__ == "__main__": translated_text = run("Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.") print(translated_text)