context-probing / app.py
cifkao's picture
Add links
bc6cc98
raw
history blame
5.38 kB
from enum import Enum
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer, BatchEncoding
root_dir = Path(__file__).resolve().parent
highlighted_text_component = components.declare_component(
"highlighted_text", path=root_dir / "highlighted_text" / "build"
)
def get_windows_batched(examples: BatchEncoding, window_len: int, stride: int = 1, pad_id: int = 0) -> BatchEncoding:
return BatchEncoding({
k: [
t[i][j : j + window_len] + [
pad_id if k == "input_ids" else 0
] * (j + window_len - len(t[i]))
for i in range(len(examples["input_ids"]))
for j in range(0, len(examples["input_ids"][i]) - 1, stride)
]
for k, t in examples.items()
})
BAD_CHAR = chr(0xfffd)
def ids_to_readable_tokens(tokenizer, ids, strip_whitespace=False):
cur_ids = []
result = []
for idx in ids:
cur_ids.append(idx)
decoded = tokenizer.decode(cur_ids)
if BAD_CHAR not in decoded:
if strip_whitespace:
decoded = decoded.strip()
result.append(decoded)
del cur_ids[:]
else:
result.append("")
return result
compact_layout = st.experimental_get_query_params().get("compact", ["false"]) == ["true"]
if not compact_layout:
st.title("Context length probing")
st.markdown(
"""[📃 Paper](https://arxiv.org/abs/2212.14815) |
[🌍 Website](https://cifkao.github.io/context-probing) |
[🧑‍💻 Code](https://cifkao.github.io/context-probing)
"""
)
model_name = st.selectbox("Model", ["distilgpt2", "gpt2", "EleutherAI/gpt-neo-125m"])
metric_name = st.selectbox("Metric", ["KL divergence", "Cross entropy"], index=1)
window_len = st.select_slider(
r"Window size ($c_\text{max}$)",
options=[8, 16, 32, 64, 128, 256, 512, 1024],
value=512
)
DEFAULT_TEXT = """
We present context length probing, a novel explanation technique for causal
language models, based on tracking the predictions of a model as a function of the length of
available context, and allowing to assign differential importance scores to different contexts.
The technique is model-agnostic and does not rely on access to model internals beyond computing
token-level probabilities. We apply context length probing to large pre-trained language models
and offer some initial analyses and insights, including the potential for studying long-range
dependencies.
""".replace("\n", " ").strip()
text = st.text_area(
"Input text",
DEFAULT_TEXT,
)
if metric_name == "KL divergence":
st.error("KL divergence is not supported yet. Stay tuned!", icon="😭")
st.stop()
with st.spinner("Loading model…"):
tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name)
model = st.cache_resource(AutoModelForCausalLM.from_pretrained, show_spinner=False)(model_name)
inputs = tokenizer([text])
[input_ids] = inputs["input_ids"]
window_len = min(window_len, len(input_ids))
if len(input_ids) < 2:
st.error("Please enter at least 2 tokens.", icon="🚨")
st.stop()
@st.cache_data(show_spinner=False)
@torch.inference_mode()
def get_logits(_model, _inputs, cache_key):
del cache_key
return _model(**_inputs).logits.to(torch.float16)
@st.cache_data(show_spinner=False)
@torch.inference_mode()
def run_context_length_probing(_model, _tokenizer, _inputs, window_len, cache_key):
del cache_key
inputs_sliding = get_windows_batched(
_inputs,
window_len=window_len,
pad_id=_tokenizer.eos_token_id
).convert_to_tensors("pt")
logits = []
with st.spinner("Running model…"):
batch_size = 8
num_items = len(inputs_sliding["input_ids"])
pbar = st.progress(0)
for i in range(0, num_items, batch_size):
pbar.progress(i / num_items, f"{i}/{num_items}")
batch = {k: v[i:i + batch_size] for k, v in inputs_sliding.items()}
logits.append(
get_logits(
_model,
batch,
cache_key=(model_name, batch["input_ids"].cpu().numpy().tobytes())
)
)
logits = torch.cat(logits, dim=0)
pbar.empty()
with st.spinner("Computing scores…"):
logits = logits.permute(1, 0, 2)
logits = F.pad(logits, (0, 0, 0, window_len, 0, 0), value=torch.nan)
logits = logits.view(-1, logits.shape[-1])[:-window_len]
logits = logits.view(window_len, len(input_ids) + window_len - 2, logits.shape[-1])
scores = logits.to(torch.float32).log_softmax(dim=-1)
scores = scores[:, torch.arange(len(input_ids[1:])), input_ids[1:]]
scores = scores.diff(dim=0).transpose(0, 1)
scores = scores.nan_to_num()
scores /= scores.abs().max(dim=1, keepdim=True).values + 1e-9
scores = scores.to(torch.float16)
return scores
scores = run_context_length_probing(
_model=model,
_tokenizer=tokenizer,
_inputs=inputs,
window_len=window_len,
cache_key=(model_name, text),
)
tokens = ids_to_readable_tokens(tokenizer, input_ids)
highlighted_text_component(tokens=tokens, scores=scores.tolist())