Model Card for SmolLM3-3B-Instruct-Anime

This model is a fine-tuned version of HuggingFaceTB/SmolLM3-3B-Base. It has been trained using zerofata/Instruct-Anime.

Quick start

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel

# Define paths
base_model_path = "./SmolLM3-3B-Base/"
adapter_path = "./SmolLM3-3B-Instruct-Anime/"

# Load the base model and tokenizer in bf16
print("Loading base model and tokenizer...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Load the LoRA adapter and merge it into the base model
print("Loading LoRA adapter and merging...")
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload() # Merge the weights

# Create the text generation pipeline
print("Creating pipeline...")
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Your question
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why? "

# Format the prompt using the chat template
# We need to load the template file just like in the training script
with open("chat_template.jinja", "r") as f:
    chat_template = f.read()
tokenizer.chat_template = chat_template

prompt = tokenizer.apply_chat_template([{"role": "user", "content": question}], tokenize=False, add_generation_prompt=True)

# Generate the output
print("Generating response...")
output = generator(prompt, max_new_tokens=2048, return_full_text=False)
print("--- Model Response ---")
print(output[0]["generated_text"])

Training procedure

This model was trained with SFT.

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
import trackio

# --- Configuration ---
model_name = "./SmolLM3-3B-Base/"
dataset_path = "./Instruct-Anime/instruct_dataset.jsonl"
output_dir = "./SmolLM3-3B-Instruct-Anime"
project_name = "smollm3-sft-anime"

# --- 1. Initialize Tracking ---
trackio.init(project=project_name)

# --- 2. Load Model and Tokenizer ---
print("Loading model and tokenizer...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Add a pad token if it's missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Load and set chat template from the jinja file
with open("chat_template.jinja", "r") as f:
    chat_template = f.read()
tokenizer.chat_template = chat_template
print("Chat template loaded from chat_template.jinja and set on the tokenizer.")


# --- Enable Gradient Checkpointing ---
print("Enabling Gradient Checkpointing...")
model.gradient_checkpointing_enable()


# --- 3. Load and Process Dataset ---
print("Loading and processing dataset...")
dataset = load_dataset("json", data_files=dataset_path, split="train")

def formatting_prompts_func(example):
    # This function formats the chat messages into a single string
    # by applying the model's chat template.
    text = tokenizer.apply_chat_template(example['messages'], tokenize=False)
    example['text'] = text
    return example

dataset = dataset.map(formatting_prompts_func, remove_columns=["messages", "source"])
print(f"Dataset loaded and formatted with {len(dataset)} examples.")


# --- 4. Configure LoRA ---
print("Configuring LoRA...")
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=['q_proj', 'v_proj'], # From the test script
    bias="none",
    task_type="CAUSAL_LM",
)

# --- 5. Configure Training ---
# Balanced learning rate and batch size for a GPU with ~24GB VRAM
print("Configuring training arguments...")
training_args = SFTConfig(
    output_dir=output_dir,
    num_train_epochs=5, # Train for a total of 5 epochs
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    optim="paged_adamw_8bit",
    learning_rate=1e-4,
    lr_scheduler: rex,
    warmup_steps=50,
    logging_steps=8,
    save_total_limit=5,  # Keep best + last few checkpoints
    load_best_model_at_end=true,
    save_strategy="steps",
    report_to="trackio",
    packing=True,
    max_length=5120,
    metric_for_best_model=eval_loss,
    greater_is_better=false
)

# --- 6. Create and Run Trainer ---
print("Creating SFTTrainer...")
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
    # The trainer will automatically use the 'text' column
)

print("Starting training...")
trainer.train() #resume_from_checkpoint=True

# --- 7. Save the final adapter ---
print("Training finished. Saving adapter.")
trainer.save_model(output_dir)

print(f"LoRA adapter saved to {output_dir}")
trackio.finish()

Framework versions

PEFT 0.17.1
TRL: 0.23.0
Transformers: 4.56.2
Pytorch: 2.8.0+cu126
Datasets: 4.1.1
Tokenizers: 0.22.1

Citations

Cite TRL as:

@misc{vonwerra2022trl,
    title        = {{TRL: Transformer Reinforcement Learning}},
    author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
    year         = 2020,
    journal      = {GitHub repository},
    publisher    = {GitHub},
    howpublished = {\url{https://github.com/huggingface/trl}}
}

Downloads last month: 16

Model tree for Pentium95/SmolLM3-3B-Instruct-Anime

Base model

HuggingFaceTB/SmolLM3-3B-Base

Adapter

(3)

this model