sqreept/ro_diacritics_llama2_13b_lora

Example usage:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer

model_name = "meta-llama/Llama-2-13b-hf"
hf_token = "<your token here>"

tokenizer = AutoTokenizer.from_pretrained(model_name)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=bnb_config)
model.load_adapter("sqreept/ro_diacritics_llama2_13b_lora", adapter_name="ro_diacritics_llama2_13b_lora")

def full_prompt(text):
    system_prompt = "You are a professional Romanian diacritics corrector."
    question = "What is the corrected version of the below text?\n<text>" + text + "</text>\n"

    messages = [
        {
            "role": "system",
            "content": system_prompt,
        },
        {
            "role": "user", 
            "content": question,
        },
    ]
    full_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return full_prompt

prompt = full_prompt("Stiinta este in general clara dar cateodata este si confuza")

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
inputs = tokenizer(prompt, return_tensors="pt")["input_ids"].to(model.device)
model.set_adapter("ro_diacritics_llama2_13b_lora")
outputs = model.generate(inputs=inputs, streamer=streamer, max_new_tokens=256, temperature=0.7, top_p=0.6, repetition_penalty=1.2)
response = tokenizer.decode(outputs[0,inputs[0].shape[-1]:], skip_special_tokens=True)