How to use?
This model is used in optillm to route between the various approaches based on the prompt.
To use the model with optillm you can just prepend router
to the model name. E.g. if we set router-gpt-4o-mini
as the model, it will use the gpt-4o-mini
as the base model.
Otherwise, refer to the code in router-plugin to see how to use this model for classification.
Usage
To use the model directly you will need to use our OptILMClassifier
class as we added additional layers to the base model. The additional
effort_encoder
is used to take into account the number of tokens a given approach consumes. Also, note
the mapping of the returned index to the APPROACHES
list as shown below.
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer, AutoConfig
from huggingface_hub import hf_hub_download
from safetensors import safe_open
from safetensors.torch import load_model
from transformers import AutoTokenizer, AutoModel
# Constants
MAX_LENGTH = 512
APPROACHES = ["none", "mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2"]
MODEL_NAME = "codelion/optillm-bert-uncased"
class OptILMClassifier(nn.Module):
def __init__(self, base_model, num_labels):
super().__init__()
self.base_model = base_model
self.effort_encoder = nn.Sequential(
nn.Linear(1, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU()
)
self.classifier = nn.Linear(base_model.config.hidden_size + 64, num_labels)
def forward(self, input_ids, attention_mask, effort):
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
pooled_output = outputs.last_hidden_state[:, 0] # Shape: (batch_size, hidden_size)
effort_encoded = self.effort_encoder(effort.unsqueeze(1)) # Shape: (batch_size, 64)
combined_input = torch.cat((pooled_output, effort_encoded), dim=1)
logits = self.classifier(combined_input)
return logits
def load_optillm_model():
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
# Load the base model
base_model = AutoModel.from_pretrained("google-bert/bert-large-uncased")
# Create the OptILMClassifier
model = OptILMClassifier(base_model, num_labels=len(APPROACHES))
model.to(device)
# Download the safetensors file
safetensors_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors")
# Load the state dict from the safetensors file
load_model(model, safetensors_path)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
return model, tokenizer, device
def preprocess_input(tokenizer, system_prompt, initial_query):
combined_input = f"{system_prompt}\n\nUser: {initial_query}"
encoding = tokenizer.encode_plus(
combined_input,
add_special_tokens=True,
max_length=MAX_LENGTH,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return encoding['input_ids'], encoding['attention_mask']
def predict_approach(model, input_ids, attention_mask, device, effort=0.7):
model.eval()
with torch.no_grad():
input_ids = input_ids.to(device)
attention_mask = attention_mask.to(device)
effort_tensor = torch.tensor([effort], dtype=torch.float).to(device)
logits = model(input_ids, attention_mask=attention_mask, effort=effort_tensor)
probabilities = F.softmax(logits, dim=1)
predicted_approach_index = torch.argmax(probabilities, dim=1).item()
confidence = probabilities[0][predicted_approach_index].item()
return APPROACHES[predicted_approach_index], confidence
You can now use the predict_approach
method to get the predicted approach as follows:
# Load the trained model
router_model, tokenizer, device = load_optillm_model()
# Preprocess the input
input_ids, attention_mask = preprocess_input(tokenizer, system_prompt, initial_query)
# Predict the best approach
predicted_approach, _ = predict_approach(router_model, input_ids, attention_mask, device)
print(f"Router predicted approach: {predicted_approach}")
- Downloads last month
- 33
Model tree for codelion/optillm-bert-uncased
Base model
google-bert/bert-large-uncased