""" Usage: python3 qa_browser.py --share """ import argparse from collections import defaultdict from glob import glob import re import gradio as gr import pandas as pd questions = [] question_selector_map = {} category_selector_map = defaultdict(list) def display_question(category_selector, request: gr.Request): # print(category_selector) choices = category_selector_map[category_selector] return gr.Dropdown.update( value=choices[0], choices=choices, ) def display_pairwise_answer( question_selector, model_selector1, model_selector2, request: gr.Request ): q = question_selector_map[question_selector] qid = q["question_id"] ans1 = q["conversation_a"] ans2 = q["conversation_b"] chat_mds = pairwise_to_gradio_chat_mds(q, ans1, ans2) return chat_mds newline_pattern1 = re.compile("\n\n(\d+\. )") newline_pattern2 = re.compile("\n\n(- )") def post_process_answer(x): """Fix Markdown rendering problems.""" x = x.replace("\u2022", "- ") x = re.sub(newline_pattern1, "\n\g<1>", x) x = re.sub(newline_pattern2, "\n\g<1>", x) return x def pairwise_to_gradio_chat_mds(question, ans_a, ans_b, turn=None): end = question["turn"] * 3 mds = [""] * end base = 0 for i in range(0, end, 3): mds[i] = "##### User\n" + question["conversation_a"][base]["content"].strip() mds[i + 1] = f"##### {question['model_a']}\n" + post_process_answer( ans_a[base + 1]["content"].strip() ) mds[i + 2] = f"##### {question['model_b']}\n" + post_process_answer( ans_b[base + 1]["content"].strip() ) base += 2 winner = question["winner"] if "tie" in question["winner"] else question[question["winner"]] mds += [f"##### Vote: {winner}"] mds += [""] * (16 - len(mds)) return mds def build_question_selector_map(): global question_selector_map, category_selector_map # Build question selector map for q in questions: preview = q["conversation_a"][0]["content"][:128] + "..." question_selector_map[preview] = q category_selector_map[q["category"]].append(preview) def build_pairwise_browser_tab(): global question_selector_map, category_selector_map num_sides = 2 num_turns = 5 side_names = ["A", "B"] question_selector_choices = list(question_selector_map.keys()) category_selector_choices = list(category_selector_map.keys()) print(category_selector_choices) # Selectors with gr.Row(): with gr.Column(scale=1, min_width=200): category_selector = gr.Dropdown( choices=category_selector_choices, # value="Instruction Following", label="Category", container=False ) with gr.Column(scale=100): question_selector = gr.Dropdown( choices=question_selector_choices, label="Question", container=False ) # Conversation chat_mds = [] for i in range(num_turns): chat_mds.append(gr.Markdown(elem_id=f"user_question_{i+1}")) with gr.Row(): for j in range(num_sides): with gr.Column(scale=100): chat_mds.append(gr.Markdown()) if j == 0: with gr.Column(scale=1, min_width=8): gr.Markdown() chat_mds.append(gr.Markdown()) # Callbacks category_selector.change(display_question, [category_selector], [question_selector]) question_selector.change( display_pairwise_answer, [question_selector], chat_mds, ) return (category_selector,) def load_demo(): dropdown_update = gr.Dropdown.update(value="Math") return dropdown_update def build_demo(): build_question_selector_map() with gr.Blocks( title="Chatbot Arena Examples", theme=gr.themes.Base(text_size=gr.themes.sizes.text_lg), ) as demo: gr.Markdown( """ # Chatbot Arena Examples We randomly sample 20 battles from each category using seed 42. | [Paper](https://arxiv.org/abs/2403.04132) | [Leaderboard](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) | """ ) (category_selector,) = build_pairwise_browser_tab() demo.load(load_demo, [], [category_selector]) return demo def load_questions(directory: str): import json """Load questions from a file.""" questions = [] for file in glob(directory): with open(file, "r") as ques_file: for line in ques_file: if line: questions.append(json.loads(line)) return questions if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0") parser.add_argument("--port", type=int) parser.add_argument("--share", action="store_true") args = parser.parse_args() print(args) questions = load_questions("data/*.jsonl") demo = build_demo() demo.launch( server_name=args.host, server_port=args.port, share=args.share, max_threads=200 )