|
--- |
|
license: apache-2.0 |
|
base_model: |
|
- google-bert/bert-large-uncased |
|
datasets: |
|
- codelion/optillm-router-dataset |
|
--- |
|
|
|
# How to use? |
|
|
|
This model is used in [optillm](https://github.com/codelion/optillm) to route between the various approaches based on the prompt. |
|
|
|
To use the model with optillm you can just prepend `router` to the model name. E.g. if we set `router-gpt-4o-mini` as the model, it will use the `gpt-4o-mini` as the base model. |
|
|
|
Otherwise, refer to the code in [router-plugin](https://github.com/codelion/optillm/blob/main/optillm/plugins/router_plugin.py) to see how to use this model for classification. |
|
|
|
# Usage |
|
|
|
To use the model directly you will need to use our `OptILMClassifier` class as we added additional layers to the base model. The additional |
|
`effort_encoder` is used to take into account the number of tokens a given approach consumes. Also, note |
|
the mapping of the returned index to the `APPROACHES` list as shown below. |
|
|
|
```python |
|
import torch |
|
import torch.nn as nn |
|
import torch.nn.functional as F |
|
from transformers import AutoModel, AutoTokenizer, AutoConfig |
|
from huggingface_hub import hf_hub_download |
|
from safetensors import safe_open |
|
from safetensors.torch import load_model |
|
from transformers import AutoTokenizer, AutoModel |
|
|
|
# Constants |
|
MAX_LENGTH = 512 |
|
APPROACHES = ["none", "mcts", "bon", "moa", "rto", "z3", "self_consistency", "pvg", "rstar", "cot_reflection", "plansearch", "leap", "re2"] |
|
MODEL_NAME = "codelion/optillm-bert-uncased" |
|
|
|
class OptILMClassifier(nn.Module): |
|
def __init__(self, base_model, num_labels): |
|
super().__init__() |
|
self.base_model = base_model |
|
self.effort_encoder = nn.Sequential( |
|
nn.Linear(1, 64), |
|
nn.ReLU(), |
|
nn.Linear(64, 64), |
|
nn.ReLU() |
|
) |
|
self.classifier = nn.Linear(base_model.config.hidden_size + 64, num_labels) |
|
|
|
def forward(self, input_ids, attention_mask, effort): |
|
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask) |
|
pooled_output = outputs.last_hidden_state[:, 0] # Shape: (batch_size, hidden_size) |
|
effort_encoded = self.effort_encoder(effort.unsqueeze(1)) # Shape: (batch_size, 64) |
|
combined_input = torch.cat((pooled_output, effort_encoded), dim=1) |
|
logits = self.classifier(combined_input) |
|
return logits |
|
|
|
def load_optillm_model(): |
|
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu") |
|
# Load the base model |
|
base_model = AutoModel.from_pretrained("google-bert/bert-large-uncased") |
|
# Create the OptILMClassifier |
|
model = OptILMClassifier(base_model, num_labels=len(APPROACHES)) |
|
model.to(device) |
|
# Download the safetensors file |
|
safetensors_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors") |
|
# Load the state dict from the safetensors file |
|
load_model(model, safetensors_path) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
return model, tokenizer, device |
|
|
|
def preprocess_input(tokenizer, system_prompt, initial_query): |
|
combined_input = f"{system_prompt}\n\nUser: {initial_query}" |
|
encoding = tokenizer.encode_plus( |
|
combined_input, |
|
add_special_tokens=True, |
|
max_length=MAX_LENGTH, |
|
padding='max_length', |
|
truncation=True, |
|
return_attention_mask=True, |
|
return_tensors='pt' |
|
) |
|
return encoding['input_ids'], encoding['attention_mask'] |
|
|
|
def predict_approach(model, input_ids, attention_mask, device, effort=0.7): |
|
model.eval() |
|
with torch.no_grad(): |
|
input_ids = input_ids.to(device) |
|
attention_mask = attention_mask.to(device) |
|
effort_tensor = torch.tensor([effort], dtype=torch.float).to(device) |
|
|
|
logits = model(input_ids, attention_mask=attention_mask, effort=effort_tensor) |
|
probabilities = F.softmax(logits, dim=1) |
|
predicted_approach_index = torch.argmax(probabilities, dim=1).item() |
|
confidence = probabilities[0][predicted_approach_index].item() |
|
|
|
return APPROACHES[predicted_approach_index], confidence |
|
``` |
|
|
|
You can now use the `predict_approach` method to get the predicted approach as follows: |
|
|
|
```python |
|
# Load the trained model |
|
router_model, tokenizer, device = load_optillm_model() |
|
|
|
# Preprocess the input |
|
input_ids, attention_mask = preprocess_input(tokenizer, system_prompt, initial_query) |
|
|
|
# Predict the best approach |
|
predicted_approach, _ = predict_approach(router_model, input_ids, attention_mask, device) |
|
|
|
print(f"Router predicted approach: {predicted_approach}") |
|
``` |