Spaces:

Glaciohound
/

LM-Steer

Running

App Files Files Community

hanchier commited on Sep 29, 2024

Commit

acd6966

1 Parent(s): 344e18f

visualization

Browse files

Files changed (2) hide show

app.py +44 -21
lm_steer/models/model_base.py +1 -1

app.py CHANGED Viewed

@@ -63,10 +63,15 @@ def word_embedding_space_analysis(
     return pd.DataFrame(
         data,
         columns=["One Direction", "Another Direction"],
-        index=[f"Dim {_i}" for _i in range(10)],
     )
 def main():
     # set up the page
     random.seed(0)
@@ -103,17 +108,17 @@ def main():
     # set up the model
     st.divider()
     st.divider()
-    st.subheader("Select a model:")
     '''
     Due to resource limits, we are only able to provide a few models for
     steering. You can also refer to the Github repository:
-    https://github.com/Glaciohound/LM-Steer to host larger models.
     Some generated texts may contain toxic or offensive content. Please be
     cautious when using the generated texts.
     Note that for these smaller models, the generation quality may not be as
     good as the larger models (GPT-4, Llama, etc.).
     '''
-    col1, col2 = st.columns(2)
     model_name = col1.selectbox(
         "Select a model to steer",
         [
@@ -143,23 +148,23 @@ def main():
     total_param = sum(p.numel() for _, p in model.named_parameters()) / \
         1024 ** 2
     ratio = num_param / total_param
-    st.write(f"Steered {num_param:.1f}M out of {total_param:.1f}M "
-             "parameters, ratio: {:.2%}".format(ratio))
     # steering
     steer_range = 3.
     steer_interval = 0.2
-    st.subheader("Enter a sentence and steer the model")
     st.session_state.prompt = st.text_input(
         "Enter a prompt",
         st.session_state.get("prompt", "My life")
     )
     col1, col2, col3 = st.columns([2, 2, 1], gap="medium")
     sentiment = col1.slider(
-        "Sentiment (the larger the more positive)",
         -steer_range, steer_range, 0.0, steer_interval)
     detoxification = col2.slider(
-        "Detoxification Strength (the larger the less toxic)",
         -steer_range, steer_range, 0.0,
         steer_interval)
     max_length = col3.number_input("Max length", 20, 200, 20, 20)
@@ -191,7 +196,7 @@ def main():
     # Analysing the sentence
     st.divider()
     st.divider()
-    st.subheader("Analyzing Styled Texts")
     '''
     LM-Steer also serves as a probe for analyzing the text. It can be used to
     analyze the sentiment and detoxification of the text. Now, we proceed and
@@ -200,23 +205,25 @@ def main():
     entangled, as a negative sentiment may also detoxify the text.
     '''
     if st.session_state.get("analyzed_text", "") != "" and \
-            st.button("Analyze the styled text", type="primary"):
         col1, col2 = st.columns(2)
-        for name, col, dim, color in zip(
             ["Sentiment", "Detoxification"],
             [col1, col2],
             [2, 0],
             ["#ff7f0e", "#1f77b4"],
         ):
             with st.spinner(f"Analyzing {name}..."):
                 col.subheader(name)
                 # classification
                 col.markdown(
                     "##### Sentence Classification Distribution")
                 _, dist_list, _ = model.steer_analysis(
                     st.session_state.analyzed_text,
                     dim, -steer_range, steer_range,
-                    bins=2*int(steer_range)+1,
                 )
                 dist_list = np.array(dist_list)
                 col.bar_chart(
@@ -241,9 +248,7 @@ def main():
                 tokens = [f"{i:3d}: {tokenizer.decode([t])}"
                           for i, t in enumerate(tokens)]
                 col.markdown("##### Token's Evidence Score in the Dimension")
-                col.write("The polarity of the token's evidence score "
-                          "which aligns with sliding bar directions."
-                          )
                 col.bar_chart(
                     pd.DataFrame(
                         {
@@ -256,23 +261,41 @@ def main():
     st.divider()
     st.divider()
-    st.subheader("The Word Embeddings Space Analysis")
     '''
     LM-Steer provides a lens on how word embeddings correlate with LM word
     embeddings: what word dimensions contribute to or contrast to a specific
     style. This analysis can be used to understand the word embedding space
     and how it steers the model's generation.
     Note that due to the bidirectional nature of the embedding spaces, in each
-    dimension, sometimes only one side of the word embeddings is most relevant
-    to the style (can be either left or right).
     '''
     for dimension in ["Sentiment", "Detoxification"]:
-        f'##### {dimension} Dimension'
         dim = 2 if dimension == "Sentiment" else 0
         analysis_result = word_embedding_space_analysis(
             model_name, dim)
         with st.expander("Show the analysis results"):
-            st.table(analysis_result)
 if __name__ == "__main__":

     return pd.DataFrame(
         data,
         columns=["One Direction", "Another Direction"],
+        index=[f"Dim#{_i}" for _i in range(n_dim)],
     )
+# rgb tuple to hex color
+def rgb_to_hex(rgb):
+    return '#%02x%02x%02x' % rgb
 def main():
     # set up the page
     random.seed(0)
     # set up the model
     st.divider()
     st.divider()
+    st.subheader("Select A Model and Steer It")
     '''
     Due to resource limits, we are only able to provide a few models for
     steering. You can also refer to the Github repository:
+    https://github.com/Glaciohound/LM-Steer to host larger models locally.
     Some generated texts may contain toxic or offensive content. Please be
     cautious when using the generated texts.
     Note that for these smaller models, the generation quality may not be as
     good as the larger models (GPT-4, Llama, etc.).
     '''
+    col1, col2, col3, col4 = st.columns([3, 1, 1, 1])
     model_name = col1.selectbox(
         "Select a model to steer",
         [
     total_param = sum(p.numel() for _, p in model.named_parameters()) / \
         1024 ** 2
     ratio = num_param / total_param
+    col2.metric("Parameters Steered", f"{num_param:.1f}M")
+    col3.metric("LM Total Size", f"{total_param:.1f}M")
+    col4.metric("Steered Ratio", f"{ratio:.2%}")
     # steering
     steer_range = 3.
     steer_interval = 0.2
     st.session_state.prompt = st.text_input(
         "Enter a prompt",
         st.session_state.get("prompt", "My life")
     )
     col1, col2, col3 = st.columns([2, 2, 1], gap="medium")
     sentiment = col1.slider(
+        "Sentiment (Negative ↔︎ Positive)",
         -steer_range, steer_range, 0.0, steer_interval)
     detoxification = col2.slider(
+        "Detoxification Strength (Toxic ↔︎ Clean)",
         -steer_range, steer_range, 0.0,
         steer_interval)
     max_length = col3.number_input("Max length", 20, 200, 20, 20)
     # Analysing the sentence
     st.divider()
     st.divider()
+    st.subheader("LM-Steer Converts LMs into Text Analyzers")
     '''
     LM-Steer also serves as a probe for analyzing the text. It can be used to
     analyze the sentiment and detoxification of the text. Now, we proceed and
     entangled, as a negative sentiment may also detoxify the text.
     '''
     if st.session_state.get("analyzed_text", "") != "" and \
+            st.button("Analyze the text above", type="primary"):
         col1, col2 = st.columns(2)
+        for name, col, dim, color, axis_annotation in zip(
             ["Sentiment", "Detoxification"],
             [col1, col2],
             [2, 0],
             ["#ff7f0e", "#1f77b4"],
+            ["Negative ↔︎ Positive", "Toxic ↔︎ Clean"]
         ):
             with st.spinner(f"Analyzing {name}..."):
                 col.subheader(name)
                 # classification
                 col.markdown(
                     "##### Sentence Classification Distribution")
+                col.write(axis_annotation)
                 _, dist_list, _ = model.steer_analysis(
                     st.session_state.analyzed_text,
                     dim, -steer_range, steer_range,
+                    bins=4*int(steer_range)+1,
                 )
                 dist_list = np.array(dist_list)
                 col.bar_chart(
                 tokens = [f"{i:3d}: {tokenizer.decode([t])}"
                           for i, t in enumerate(tokens)]
                 col.markdown("##### Token's Evidence Score in the Dimension")
+                col.write(axis_annotation)
                 col.bar_chart(
                     pd.DataFrame(
                         {
     st.divider()
     st.divider()
+    st.subheader("LM-Steer Unveils Word Embeddings Space")
     '''
     LM-Steer provides a lens on how word embeddings correlate with LM word
     embeddings: what word dimensions contribute to or contrast to a specific
     style. This analysis can be used to understand the word embedding space
     and how it steers the model's generation.
     Note that due to the bidirectional nature of the embedding spaces, in each
+    dimension, sometimes only one side of the word embeddings contributes
+    (has an impact on the style), while the other side, (resulting in negative
+    logits) has a negligible impact on the style. The table below shows both
+    sides of the word embeddings in each dimension.
     '''
     for dimension in ["Sentiment", "Detoxification"]:
+        f'##### {dimension} Word Dimensions'
         dim = 2 if dimension == "Sentiment" else 0
         analysis_result = word_embedding_space_analysis(
             model_name, dim)
         with st.expander("Show the analysis results"):
+            color_scale = 7
+            color_init = 230
+            st.table(analysis_result.style.apply(
+                lambda x: [
+                    "background: " + rgb_to_hex(
+                        (255,
+                         color_init-(9-i)*color_scale,
+                         color_init-(9-i)*color_scale)
+                        if dimension == "Sentiment" else
+                        (color_init-(9-i)*color_scale,
+                         color_init-(9-i)*color_scale,
+                         255)
+                    )
+                    for i in range(len(x))
+                ]
+            ))
 if __name__ == "__main__":

lm_steer/models/model_base.py CHANGED Viewed

@@ -109,7 +109,7 @@ class LMSteerBase(nn.Module):
         )
         loss_token = loss_token.reshape(bins + 1, length - 1)
         loss = loss_token.mean(-1)[:-1]
-        dist = ((- loss + loss.mean()) * 100).softmax(0)
         dist_list = list(zip(
             [
                 min_value + (max_value - min_value) / (bins - 1) * bin_i

         )
         loss_token = loss_token.reshape(bins + 1, length - 1)
         loss = loss_token.mean(-1)[:-1]
+        dist = ((- loss + loss.mean()) * 10).softmax(0)
         dist_list = list(zip(
             [
                 min_value + (max_value - min_value) / (bins - 1) * bin_i