|
""" |
|
Quantum Physics Problem Generator |
|
Shlomo Kashani |
|
|
|
Description: |
|
------------ |
|
This module is part of the QuantumLLMInstruct system, designed to generate and solve quantum physics problems |
|
using advanced Large Language Models (LLMs). It utilizes a multi-stage pipeline for problem generation, |
|
solution generation, and database management. |
|
|
|
Core Functionalities: |
|
--------------------- |
|
1. **Problem Generation**: |
|
- Generates quantum physics problems in LaTeX format using LLMs. |
|
- Supports domain-specific problem generation across multiple quantum fields. |
|
|
|
2. **Solution Generation**: |
|
- Provides step-by-step LaTeX solutions for the generated problems using a second LLM. |
|
|
|
3. **Data Management**: |
|
- Stores generated problems and solutions in DuckDB and Parquet files. |
|
- Enables exporting data in Parquet format for scalability and compatibility. |
|
|
|
4. **Gradio Interface**: |
|
- A user-friendly interface to interact with the system, including problem generation, |
|
solution generation, and database exploration. |
|
|
|
5. **Hugging Face Integration**: |
|
- Supports visualization and interaction with the dataset on the Hugging Face platform. |
|
|
|
Main Components: |
|
---------------- |
|
- **initialize_duckdb() / initialize_parquet()**: Initializes the database schema. |
|
- **generate_multiple_problems()**: Generates multiple problems for the selected quantum domains. |
|
- **generate_solutions()**: Solves unsolved problems in the database. |
|
- **export_parquet()**: Exports the database to a Parquet file for external use. |
|
|
|
Dependencies: |
|
------------- |
|
- Python 3.7+ |
|
- Transformers: `transformers` |
|
- DuckDB: `duckdb` |
|
- Gradio: `gradio` |
|
- Pandas: `pandas` |
|
""" |
|
import gradio as gr |
|
|
|
from Q_llm_prompts import * |
|
from Q_quantum_utils import * |
|
|
|
initialize_duckdb() |
|
|
|
description = """ |
|
This demo showcases **[QuantumLLMInstruct](https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/)** |
|
<img src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/resolve/main/qlmmi-detailed-flowchart.jpg" alt="The Pipeline" width="70%" align="center" /> |
|
## π Pipeline: |
|
1. **π Problem Generation:** The Qwen model generates a user instruction. |
|
2. **π¬ Solution Generation:** The Qwen model generates a response to this instruction. |
|
""" |
|
|
|
|
|
with gr.Blocks() as app: |
|
""" |
|
Main Gradio application block defining the QuantumLLMInstruct pipeline. |
|
Provides tabs for viewing datasets, generating problems, generating solutions, |
|
and exporting the database. |
|
""" |
|
gr.Markdown("# QuantumLLMInstruct: A 500k LLM Instruction-Tuning Dataset with Problem-Solution Pairs for Quantum Computing.") |
|
|
|
with gr.Tab("View HF DB"): |
|
""" |
|
Tab for displaying the Hugging Face QuantumLLMInstruct dataset. |
|
Embeds a viewer for exploring the dataset hosted on Hugging Face. |
|
""" |
|
gr.Markdown("### Generated Dataset") |
|
gr.HTML("""<iframe |
|
src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/embed/viewer" |
|
frameborder="0" |
|
width="100%" |
|
height="560px" |
|
></iframe>""") |
|
|
|
with gr.Tab("LLM stage 1 model: Generate Problems"): |
|
""" |
|
Tab for generating quantum physics problems using the Qwen model. |
|
Allows users to select models, choose domains, and specify the number of problems to generate. |
|
""" |
|
model_selector = gr.Dropdown( |
|
choices=model_options, |
|
value=model_options[0], |
|
label="Select Qwen Model" |
|
) |
|
reload_button = gr.Button("Reload Model") |
|
reload_status = gr.Textbox(label="Model Status", interactive=False) |
|
|
|
generate_button = gr.Button("π Generate Instructions For the Pair") |
|
result_output = gr.Textbox(label="Generated Problems", interactive=False) |
|
num_pairs = gr.Radio(choices=[1, 5, 50, 200, 2000, 20000, 200000], value=5, label="Number of Problems") |
|
|
|
domain_selector = gr.CheckboxGroup( |
|
choices=list(quantum_problem_domains.keys()), |
|
value=list(quantum_problem_domains.keys()), |
|
label="Select Domain Types" |
|
) |
|
|
|
reload_button.click( |
|
reload_model, |
|
inputs=[model_selector], |
|
outputs=[reload_status] |
|
) |
|
|
|
def generate_and_display(num_pairs, selected_domains): |
|
""" |
|
Generates multiple quantum problems based on user inputs. |
|
|
|
Args: |
|
num_pairs (int): Number of problems to generate. |
|
selected_domains (list): Selected quantum domains for problem generation. |
|
|
|
Returns: |
|
str: Status message confirming successful problem generation. |
|
""" |
|
generate_multiple_problems(num_pairs, selected_domains) |
|
return "Problems generated successfully." |
|
|
|
generate_button.click( |
|
generate_and_display, |
|
inputs=[num_pairs, domain_selector], |
|
outputs=[result_output] |
|
) |
|
|
|
with gr.Tab("LLM Stage 2: Generate Solutions"): |
|
""" |
|
Tab for generating solutions to the quantum problems using the Qwen solution models. |
|
""" |
|
generate_solutions_button = gr.Button("π Generate Responses for the Pair") |
|
solutions_status = gr.Textbox(label="Solution Generation Status", interactive=False) |
|
|
|
solutions_model_selector = gr.Dropdown( |
|
choices=solutions_model_options, |
|
value=solutions_model_options[4], |
|
label="Select Solution Model" |
|
) |
|
|
|
generate_solutions_button.click( |
|
generate_solutions, |
|
inputs=[solutions_model_selector], |
|
outputs=[solutions_status] |
|
) |
|
|
|
with gr.Tab("View instruction-pairs DB"): |
|
""" |
|
Tab for viewing data stored in the DuckDB database. |
|
Provides an HTML table viewer for summarized and detailed data exploration. |
|
""" |
|
summary_output = gr.HTML() |
|
view_button = gr.Button("View Data") |
|
db_output_display = gr.HTML() |
|
|
|
view_button.click(load_summary_from_duckdb, inputs=None, outputs=summary_output) |
|
|
|
def view_db_data(): |
|
""" |
|
Loads and formats problems stored in the DuckDB database for display. |
|
|
|
Returns: |
|
str: HTML representation of the database content or a message indicating no data. |
|
""" |
|
df = load_problems_from_duckdb() |
|
if df.empty: |
|
return "<p>No data found in the DuckDB database.</p>" |
|
html_table = df.to_html(index=False, escape=False) |
|
return html_table |
|
|
|
view_button.click( |
|
view_db_data, |
|
inputs=None, |
|
outputs=[db_output_display] |
|
) |
|
|
|
with gr.Tab("Export Parquet"): |
|
""" |
|
Tab for exporting the DuckDB database into a Parquet file format. |
|
Enables efficient data sharing and downstream analysis. |
|
""" |
|
gr.Markdown("### Export DuckDB Data to Parquet Format") |
|
db_file_input = gr.Textbox(label="Database File Path", value="quantum_problems.duckdb") |
|
export_button = gr.Button("Export Data") |
|
export_status = gr.Textbox(label="Export Status", interactive=False) |
|
|
|
export_button.click( |
|
export_parquet, |
|
inputs=[db_file_input], |
|
outputs=[export_status] |
|
) |
|
|
|
gr.Markdown(description) |
|
|
|
app.launch() |
|
|