import gradio as gr import pandas as pd import logging import re logging.basicConfig(level=logging.INFO) def load_repo_df(data_path="data/repo_representations.jsonl"): data = pd.read_json(data_path, lines=True, orient="records") return data.assign( text=data["text"] .str.replace(r"", "", regex=True) .str.replace("│", "\n") .str.replace("⋮", "\n") ) def display_representations(repo, representation1, representation2): repo_data = repos_df[repos_df["repo_name"] == repo] logging.info(f"repo_data: {repo_data}") text1 = ( repo_data[repo_data["representation"] == representation1]["text"].iloc[0] if not repo_data[repo_data["representation"] == representation1].empty else "No data available" ) text2 = ( repo_data[repo_data["representation"] == representation2]["text"].iloc[0] if not repo_data[repo_data["representation"] == representation2].empty else "No data available" ) return text1, text2 if __name__ == "__main__": repos_df = load_repo_df() repos = list(repos_df["repo_name"].unique()) representation_types = list(repos_df["representation"].unique()) logging.info(f"found {len(repos)} repositories") logging.info(f"representation types: {representation_types}") with gr.Blocks() as demo: gr.Markdown("# Repository Representations Viewer") gr.Markdown("Select a repository and two representation types to compare them.") with gr.Row(): repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0]) representation1 = gr.Dropdown( choices=representation_types, label="Representation 1", value="readme" ) representation2 = gr.Dropdown( choices=representation_types, label="Representation 2", value="generated_readme", ) with gr.Row(): with gr.Box(elem_id="box1"): text1 = gr.Markdown() with gr.Box(elem_id="box2"): text2 = gr.Markdown() # Add custom CSS gr.Markdown( """ """ ) def update_representations(*args): text1_content, text2_content = display_representations(*args) return ( f"### Representation 1\n\n{text1_content}", f"### Representation 2\n\n{text2_content}", ) # Initial call to populate textboxes with default values text1.value, text2.value = update_representations( repos[0], "readme", "generated_readme" ) for component in [repo, representation1, representation2]: component.change( fn=update_representations, inputs=[repo, representation1, representation2], outputs=[text1, text2], ) demo.launch()