from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

quant_config = BitsAndBytesConfig(load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained("sayandafadar/Article-Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained(
       "sayandafadar/Article-Llama-3.2-1B",
       device_map="auto",
       quantization_config=quant_config
   )

alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}"""

prompt = "Write a post on robotics."

message = alpaca_template.format(prompt, "")
inputs = tokenizer([message], return_tensors="pt").to("cuda")

generated_ids = model.generate(inputs.input_ids, max_new_tokens=256)

generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

print(generated_text)
Downloads last month
1
Safetensors
Model size
1B params
Tensor type
BF16
·
Inference Providers NEW
Input a message to start chatting with sayandafadar/Article-Llama-3.2-1B.

Model tree for sayandafadar/Article-Llama-3.2-1B

Quantizations
3 models