from huggingface_hub import snapshot_download
from transformers import AutoTokenizer, AutoModelForCausalLM


model_id = "tencent/HY-MT1.5-1.8B-GGUF"
gguf_file = "HY-MT1.5-1.8B-Q8_0.gguf"
local_dir = "./models"
model_path = snapshot_download(model_id, local_dir=local_dir)
tokenizer = AutoTokenizer.from_pretrained(model_path, gguf_file=gguf_file)
model = AutoModelForCausalLM.from_pretrained(model_path, gguf_file=gguf_file)

def run(
    text: str = "It’s on the house.",
    target_language: str = "Portuguese",
): 
    messages = [
        {
            "role": "user", 
            "content": f"Translate the following segment into {target_language}, without additional explanation.\n\n{text}"
        },
    ]
    tokenized_chat = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=False,
        return_tensors="pt"
    )
    input_ids = tokenized_chat.to(model.device)
    input_length = input_ids.shape[1]

    outputs = model.generate(input_ids, max_new_tokens=2048)
    # 2. Fatiamos o tensor: pegamos do [input_length:] até o fim
    # Isso isola apenas os tokens novos gerados
    generated_tokens = outputs[0][input_length:]

    output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
    return output_text

if __name__ == "__main__":
    translated_text = run("Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.")
    print(translated_text)