Model Card for SmolLM3-3B-Instruct-Anime
This model is a fine-tuned version of HuggingFaceTB/SmolLM3-3B-Base. It has been trained using zerofata/Instruct-Anime.
Quick start
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
# Define paths
base_model_path = "./SmolLM3-3B-Base/"
adapter_path = "./SmolLM3-3B-Instruct-Anime/"
# Load the base model and tokenizer in bf16
print("Loading base model and tokenizer...")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
# Load the LoRA adapter and merge it into the base model
print("Loading LoRA adapter and merging...")
model = PeftModel.from_pretrained(base_model, adapter_path)
model = model.merge_and_unload() # Merge the weights
# Create the text generation pipeline
print("Creating pipeline...")
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
# Your question
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why? "
# Format the prompt using the chat template
# We need to load the template file just like in the training script
with open("chat_template.jinja", "r") as f:
chat_template = f.read()
tokenizer.chat_template = chat_template
prompt = tokenizer.apply_chat_template([{"role": "user", "content": question}], tokenize=False, add_generation_prompt=True)
# Generate the output
print("Generating response...")
output = generator(prompt, max_new_tokens=2048, return_full_text=False)
print("--- Model Response ---")
print(output[0]["generated_text"])
Training procedure
This model was trained with SFT.
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
import trackio
# --- Configuration ---
model_name = "./SmolLM3-3B-Base/"
dataset_path = "./Instruct-Anime/instruct_dataset.jsonl"
output_dir = "./SmolLM3-3B-Instruct-Anime"
project_name = "smollm3-sft-anime"
# --- 1. Initialize Tracking ---
trackio.init(project=project_name)
# --- 2. Load Model and Tokenizer ---
print("Loading model and tokenizer...")
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True,
attn_implementation="flash_attention_2",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Add a pad token if it's missing
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
# Load and set chat template from the jinja file
with open("chat_template.jinja", "r") as f:
chat_template = f.read()
tokenizer.chat_template = chat_template
print("Chat template loaded from chat_template.jinja and set on the tokenizer.")
# --- Enable Gradient Checkpointing ---
print("Enabling Gradient Checkpointing...")
model.gradient_checkpointing_enable()
# --- 3. Load and Process Dataset ---
print("Loading and processing dataset...")
dataset = load_dataset("json", data_files=dataset_path, split="train")
def formatting_prompts_func(example):
# This function formats the chat messages into a single string
# by applying the model's chat template.
text = tokenizer.apply_chat_template(example['messages'], tokenize=False)
example['text'] = text
return example
dataset = dataset.map(formatting_prompts_func, remove_columns=["messages", "source"])
print(f"Dataset loaded and formatted with {len(dataset)} examples.")
# --- 4. Configure LoRA ---
print("Configuring LoRA...")
peft_config = LoraConfig(
r=8,
lora_alpha=16,
lora_dropout=0.1,
target_modules=['q_proj', 'v_proj'], # From the test script
bias="none",
task_type="CAUSAL_LM",
)
# --- 5. Configure Training ---
# Balanced learning rate and batch size for a GPU with ~24GB VRAM
print("Configuring training arguments...")
training_args = SFTConfig(
output_dir=output_dir,
num_train_epochs=5, # Train for a total of 5 epochs
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
optim="paged_adamw_8bit",
learning_rate=1e-4,
lr_scheduler: rex,
warmup_steps=50,
logging_steps=8,
save_total_limit=5, # Keep best + last few checkpoints
load_best_model_at_end=true,
save_strategy="steps",
report_to="trackio",
packing=True,
max_length=5120,
metric_for_best_model=eval_loss,
greater_is_better=false
)
# --- 6. Create and Run Trainer ---
print("Creating SFTTrainer...")
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=dataset,
peft_config=peft_config,
# The trainer will automatically use the 'text' column
)
print("Starting training...")
trainer.train() #resume_from_checkpoint=True
# --- 7. Save the final adapter ---
print("Training finished. Saving adapter.")
trainer.save_model(output_dir)
print(f"LoRA adapter saved to {output_dir}")
trackio.finish()
Framework versions
- PEFT 0.17.1
- TRL: 0.23.0
- Transformers: 4.56.2
- Pytorch: 2.8.0+cu126
- Datasets: 4.1.1
- Tokenizers: 0.22.1
Citations
Cite TRL as:
@misc{vonwerra2022trl,
title = {{TRL: Transformer Reinforcement Learning}},
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
year = 2020,
journal = {GitHub repository},
publisher = {GitHub},
howpublished = {\url{https://github.com/huggingface/trl}}
}
- Downloads last month
- 16
Model tree for Pentium95/SmolLM3-3B-Instruct-Anime
Base model
HuggingFaceTB/SmolLM3-3B-Base