""" Quantum Physics Problem Generator Shlomo Kashani Description: ------------ This module is part of the QuantumLLMInstruct system, designed to generate and solve quantum physics problems using advanced Large Language Models (LLMs). It utilizes a multi-stage pipeline for problem generation, solution generation, and database management. Core Functionalities: --------------------- 1. **Problem Generation**: - Generates quantum physics problems in LaTeX format using LLMs. - Supports domain-specific problem generation across multiple quantum fields. 2. **Solution Generation**: - Provides step-by-step LaTeX solutions for the generated problems using a second LLM. 3. **Data Management**: - Stores generated problems and solutions in DuckDB and Parquet files. - Enables exporting data in Parquet format for scalability and compatibility. 4. **Gradio Interface**: - A user-friendly interface to interact with the system, including problem generation, solution generation, and database exploration. 5. **Hugging Face Integration**: - Supports visualization and interaction with the dataset on the Hugging Face platform. Main Components: ---------------- - **initialize_duckdb() / initialize_parquet()**: Initializes the database schema. - **generate_multiple_problems()**: Generates multiple problems for the selected quantum domains. - **generate_solutions()**: Solves unsolved problems in the database. - **export_parquet()**: Exports the database to a Parquet file for external use. Dependencies: ------------- - Python 3.7+ - Transformers: `transformers` - DuckDB: `duckdb` - Gradio: `gradio` - Pandas: `pandas` """ import gradio as gr from Q_llm_prompts import * from Q_quantum_utils import * initialize_duckdb() description = """ This demo showcases **[QuantumLLMInstruct](https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/)** The Pipeline ## 🚀 Pipeline: 1. **📝 Problem Generation:** The Qwen model generates a user instruction. 2. **💬 Solution Generation:** The Qwen model generates a response to this instruction. """ # Gradio app with gr.Blocks() as app: """ Main Gradio application block defining the QuantumLLMInstruct pipeline. Provides tabs for viewing datasets, generating problems, generating solutions, and exporting the database. """ gr.Markdown("# QuantumLLMInstruct: A 500k LLM Instruction-Tuning Dataset with Problem-Solution Pairs for Quantum Computing.") with gr.Tab("View HF DB"): """ Tab for displaying the Hugging Face QuantumLLMInstruct dataset. Embeds a viewer for exploring the dataset hosted on Hugging Face. """ gr.Markdown("### Generated Dataset") gr.HTML("""""") with gr.Tab("LLM stage 1 model: Generate Problems"): """ Tab for generating quantum physics problems using the Qwen model. Allows users to select models, choose domains, and specify the number of problems to generate. """ model_selector = gr.Dropdown( choices=model_options, value=model_options[0], label="Select Qwen Model" ) reload_button = gr.Button("Reload Model") reload_status = gr.Textbox(label="Model Status", interactive=False) generate_button = gr.Button("🚀 Generate Instructions For the Pair") result_output = gr.Textbox(label="Generated Problems", interactive=False) num_pairs = gr.Radio(choices=[1, 5, 50, 200, 2000, 20000, 200000], value=5, label="Number of Problems") domain_selector = gr.CheckboxGroup( choices=list(quantum_problem_domains.keys()), value=list(quantum_problem_domains.keys()), label="Select Domain Types" ) reload_button.click( reload_model, inputs=[model_selector], outputs=[reload_status] ) def generate_and_display(num_pairs, selected_domains): """ Generates multiple quantum problems based on user inputs. Args: num_pairs (int): Number of problems to generate. selected_domains (list): Selected quantum domains for problem generation. Returns: str: Status message confirming successful problem generation. """ generate_multiple_problems(num_pairs, selected_domains) return "Problems generated successfully." generate_button.click( generate_and_display, inputs=[num_pairs, domain_selector], outputs=[result_output] ) with gr.Tab("LLM Stage 2: Generate Solutions"): """ Tab for generating solutions to the quantum problems using the Qwen solution models. """ generate_solutions_button = gr.Button("🚀 Generate Responses for the Pair") solutions_status = gr.Textbox(label="Solution Generation Status", interactive=False) solutions_model_selector = gr.Dropdown( choices=solutions_model_options, value=solutions_model_options[4], label="Select Solution Model" ) generate_solutions_button.click( generate_solutions, inputs=[solutions_model_selector], outputs=[solutions_status] ) with gr.Tab("View instruction-pairs DB"): """ Tab for viewing data stored in the DuckDB database. Provides an HTML table viewer for summarized and detailed data exploration. """ summary_output = gr.HTML() view_button = gr.Button("View Data") db_output_display = gr.HTML() view_button.click(load_summary_from_duckdb, inputs=None, outputs=summary_output) def view_db_data(): """ Loads and formats problems stored in the DuckDB database for display. Returns: str: HTML representation of the database content or a message indicating no data. """ df = load_problems_from_duckdb() if df.empty: return "

No data found in the DuckDB database.

" html_table = df.to_html(index=False, escape=False) return html_table view_button.click( view_db_data, inputs=None, outputs=[db_output_display] ) with gr.Tab("Export Parquet"): """ Tab for exporting the DuckDB database into a Parquet file format. Enables efficient data sharing and downstream analysis. """ gr.Markdown("### Export DuckDB Data to Parquet Format") db_file_input = gr.Textbox(label="Database File Path", value="quantum_problems.duckdb") export_button = gr.Button("Export Data") export_status = gr.Textbox(label="Export Status", interactive=False) export_button.click( export_parquet, inputs=[db_file_input], outputs=[export_status] ) gr.Markdown(description) app.launch()