Spaces:

lavague-ai
/

interpretable_movie_classifier_sae_gemma

Runtime error

App Files Files Community

JoFrost commited on Sep 9, 2024

Commit

8330b1d

1 Parent(s): 155a7fa

feat: add text

Browse files

Files changed (1) hide show

app.py +93 -52

app.py CHANGED Viewed

@@ -24,6 +24,44 @@ params = {
     "filename" : "layer_31/width_16k/average_l0_76/params.npz"
 }
 C = 0.01
 model_name = params["model_name"]
@@ -168,58 +206,61 @@ def get_feature_iframe(feature):
     html = gr.HTML(html_content)
     return html
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column(scale=4):
-            input_text = gr.Textbox(label="Input", show_label=False, value=DEFAULT_EXAMPLE)
-            gr.Examples(
-                examples=examples,
-                inputs=input_text,
-            )
-        with gr.Column(scale=1):
-            run_button = gr.Button("Run")
-    with gr.Row():
-        label = gr.Label(label="Scores")
-    with gr.Row():
-        with gr.Column(scale=1):
-            plot = gr.Plot(label="Plot")
-            dropdown = gr.Dropdown(choices=["Option 1"], label="Features")
-        with gr.Column(scale=1):
-            highlighted_text = gr.HighlightedText(
-                                label="Activating Tokens",
-                                combine_adjacent=True,
-                                show_legend=True,
-                                color_map={"+": "red", "-": "green"})
-    with gr.Row():
-        html = gr.HTML()
-    # Connect the components
-    run_button.click(
-        fn=get_features,
-        inputs=[input_text],
-        outputs=[label, plot, dropdown],
-    ).then(
-       fn=get_highlighted_text,
-         inputs=[input_text, dropdown],
-         outputs=[highlighted_text]
-    ).then(
-       fn=get_feature_iframe,
-        inputs=[dropdown],
-        outputs=[html]
-    )
-    dropdown.change(
         fn=get_highlighted_text,
-        inputs=[input_text, dropdown],
-        outputs=[highlighted_text]
-    ).then(
-       fn=get_feature_iframe,
-        inputs=[dropdown],
-        outputs=[html]
-    )
 demo.launch(share=True)

     "filename" : "layer_31/width_16k/average_l0_76/params.npz"
 }
+title = """
+<div class='parent' align="center">
+<div class='child' style="display: inline-block !important; margin-bottom: 20px;">
+<h1 style="margin-bottom: 30px;">🔍Interpretable Classifier for movie ratings using Gemma 2 with SAEs</h1>
+</div>
+</div>
+<div class='parent' align="center">
+<p>This space demonstrates how a linear classifier trained on top of features learned by Sparse Auto Encoders (SAEs) can be used to create interpretable natural language classifiers.</p>
+<p>We leverage the interpretability API of <b>Neuronpedia</b> to provide more information about the features used by the LLM (like what tokens activate it the most and their distribution).</p>
+<p><b>More resources on interpretability for LLMs using SAEs:</b></p>
+</div>
+<ul>
+    <li><a href="https://transformer-circuits.pub/2024/scaling-monosemanticity/">Anthropic: Scaling Monosemanticity: Extracting Interpretable Features from Claude 3 Sonnet</a></li>
+    <li><a href="https://blog.eleuther.ai/autointerp/">EleutherAI: Open Source Automated Interpretability for Sparse Autoencoder Features</a></li>
+    <li><a href="https://www.gemma.ai/gemma-scope">Gemma Scope: Open Sparse Autoencoders Everywhere All At Once on Gemma 2</a></li>
+</ul>
+<div class='parent' align="center">
+<p>About us: <b> 🌊 LaVague</b> is an open-source framework to build AI Web Agents. Check out our <a href="https://github.com/lavague-ai/LaVague">GitHub</a> or join our <a href="https://discord.com/invite/SDxn9KpqX9">Discord</a>.</p>
+</div>
+"""
+css = """
+    .my-button {
+        height: 100px; /* Increase the height of the buttons */
+        width: 100%; /* Make sure the button takes the full width */
+        max-width: 300px; /* Optional: set a max width */
+        max-height: 80px;
+        font-size: 1.1rem; /* Increase font size */
+    }
+    .button-container {
+        display: flex;
+        justify-content: center; /* Center buttons horizontally */
+        align-items: center; /* Center buttons vertically */
+        height: 100%; /* Ensure it takes up the full height */
+        width: 100%; /* Ensure it takes up the full width */
+    }
+"""
 C = 0.01
 model_name = params["model_name"]
     html = gr.HTML(html_content)
     return html
+with gr.Blocks(gr.themes.Default(primary_hue="blue", secondary_hue="neutral"), css=css) as demo:
+    with gr.Tab(""):
+        with gr.Row():
+            gr.HTML(title)
+        with gr.Row():
+            with gr.Column(scale=4):
+                input_text = gr.Textbox(label="Input", show_label=False, value=DEFAULT_EXAMPLE)
+                gr.Examples(
+                    examples=examples,
+                    inputs=input_text,
+                )
+            with gr.Column(scale=1):
+                run_button = gr.Button("Run")
+        with gr.Row():
+            label = gr.Label(label="Scores")
+        with gr.Row():
+            with gr.Column(scale=1):
+                plot = gr.Plot(label="Plot")
+                dropdown = gr.Dropdown(choices=["Option 1"], label="Features")
+            with gr.Column(scale=1):
+                highlighted_text = gr.HighlightedText(
+                                    label="Activating Tokens",
+                                    combine_adjacent=True,
+                                    show_legend=True,
+                                    color_map={"+": "red", "-": "green"})
+        with gr.Row():
+            html = gr.HTML()
+        # Connect the components
+        run_button.click(
+            fn=get_features,
+            inputs=[input_text],
+            outputs=[label, plot, dropdown],
+        ).then(
         fn=get_highlighted_text,
+            inputs=[input_text, dropdown],
+            outputs=[highlighted_text]
+        ).then(
+        fn=get_feature_iframe,
+            inputs=[dropdown],
+            outputs=[html]
+        )
+        dropdown.change(
+            fn=get_highlighted_text,
+            inputs=[input_text, dropdown],
+            outputs=[highlighted_text]
+        ).then(
+        fn=get_feature_iframe,
+            inputs=[dropdown],
+            outputs=[html]
+        )
 demo.launch(share=True)