cifkao commited on
Commit
106dd6f
1 Parent(s): e4bf282

Add/improve help strings

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -36,7 +36,7 @@ generation_mode = st.radio(
36
  st.caption(
37
  "In basic mode, we analyze the model's one-step-ahead predictions on the input text. "
38
  "In generation mode, we generate a continuation of the input text (prompt) "
39
- "and visualize the contributions of different contexts to each generated token."
40
  )
41
 
42
  model_name = st.selectbox(
@@ -50,7 +50,15 @@ model_name = st.selectbox(
50
  ]
51
  )
52
  metric_name = st.radio(
53
- "Metric", (["KL divergence"] if not generation_mode else []) + ["NLL loss"], index=0, horizontal=True
 
 
 
 
 
 
 
 
54
  )
55
 
56
  tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name, use_fast=False)
@@ -68,7 +76,9 @@ window_len_options = [
68
  window_len = st.select_slider(
69
  r"Window size ($c_\text{max}$)",
70
  options=window_len_options,
71
- value=min(128, window_len_options[-1])
 
 
72
  )
73
  # Now figure out how many tokens we are allowed to use:
74
  # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM
 
36
  st.caption(
37
  "In basic mode, we analyze the model's one-step-ahead predictions on the input text. "
38
  "In generation mode, we generate a continuation of the input text (prompt) "
39
+ "and analyze the model's predictions influencing the generated tokens."
40
  )
41
 
42
  model_name = st.selectbox(
 
50
  ]
51
  )
52
  metric_name = st.radio(
53
+ "Metric",
54
+ (["KL divergence"] if not generation_mode else []) + ["NLL loss"],
55
+ index=0,
56
+ horizontal=True,
57
+ help="**KL divergence** is computed between the predictions with the reduced context "
58
+ "(corresponding to the highlighted token) and the predictions with the full context "
59
+ "($c_\\text{max}$ tokens). \n"
60
+ "**NLL loss** is the negative log-likelihood loss (a.k.a. cross entropy) for the target "
61
+ "token."
62
  )
63
 
64
  tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name, use_fast=False)
 
76
  window_len = st.select_slider(
77
  r"Window size ($c_\text{max}$)",
78
  options=window_len_options,
79
+ value=min(128, window_len_options[-1]),
80
+ help="The maximum context length $c_\text{max}$ for which we compute the scores. Smaller "
81
+ "windows are less computationally intensive, allowing for longer inputs."
82
  )
83
  # Now figure out how many tokens we are allowed to use:
84
  # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM