Spaces:
Running
Running
| from huggingface_hub import snapshot_download | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| model_id = "tencent/HY-MT1.5-1.8B-GGUF" | |
| gguf_file = "HY-MT1.5-1.8B-Q8_0.gguf" | |
| local_dir = "./models" | |
| model_path = snapshot_download(model_id, local_dir=local_dir) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, gguf_file=gguf_file) | |
| model = AutoModelForCausalLM.from_pretrained(model_path, gguf_file=gguf_file) | |
| def run( | |
| text: str = "It’s on the house.", | |
| target_language: str = "Portuguese", | |
| ): | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"Translate the following segment into {target_language}, without additional explanation.\n\n{text}" | |
| }, | |
| ] | |
| tokenized_chat = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=False, | |
| return_tensors="pt" | |
| ) | |
| input_ids = tokenized_chat.to(model.device) | |
| input_length = input_ids.shape[1] | |
| outputs = model.generate(input_ids, max_new_tokens=2048) | |
| # 2. Fatiamos o tensor: pegamos do [input_length:] até o fim | |
| # Isso isola apenas os tokens novos gerados | |
| generated_tokens = outputs[0][input_length:] | |
| output_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
| return output_text | |
| if __name__ == "__main__": | |
| translated_text = run("Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.") | |
| print(translated_text) |