QuantumLLMInstruct / Q_app_llm_pairs_gen.py
BoltzmannEntropy
LT
5ebbc77
raw
history blame
7.45 kB
"""
Quantum Physics Problem Generator
Shlomo Kashani
Description:
------------
This module is part of the QuantumLLMInstruct system, designed to generate and solve quantum physics problems
using advanced Large Language Models (LLMs). It utilizes a multi-stage pipeline for problem generation,
solution generation, and database management.
Core Functionalities:
---------------------
1. **Problem Generation**:
- Generates quantum physics problems in LaTeX format using LLMs.
- Supports domain-specific problem generation across multiple quantum fields.
2. **Solution Generation**:
- Provides step-by-step LaTeX solutions for the generated problems using a second LLM.
3. **Data Management**:
- Stores generated problems and solutions in DuckDB and Parquet files.
- Enables exporting data in Parquet format for scalability and compatibility.
4. **Gradio Interface**:
- A user-friendly interface to interact with the system, including problem generation,
solution generation, and database exploration.
5. **Hugging Face Integration**:
- Supports visualization and interaction with the dataset on the Hugging Face platform.
Main Components:
----------------
- **initialize_duckdb() / initialize_parquet()**: Initializes the database schema.
- **generate_multiple_problems()**: Generates multiple problems for the selected quantum domains.
- **generate_solutions()**: Solves unsolved problems in the database.
- **export_parquet()**: Exports the database to a Parquet file for external use.
Dependencies:
-------------
- Python 3.7+
- Transformers: `transformers`
- DuckDB: `duckdb`
- Gradio: `gradio`
- Pandas: `pandas`
"""
import gradio as gr
from Q_llm_prompts import *
from Q_quantum_utils import *
initialize_duckdb()
description = """
This demo showcases **[QuantumLLMInstruct](https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/)**
<img src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/resolve/main/qlmmi-detailed-flowchart.jpg" alt="The Pipeline" width="70%" align="center" />
## πŸš€ Pipeline:
1. **πŸ“ Problem Generation:** The Qwen model generates a user instruction.
2. **πŸ’¬ Solution Generation:** The Qwen model generates a response to this instruction.
"""
# Gradio app
with gr.Blocks() as app:
"""
Main Gradio application block defining the QuantumLLMInstruct pipeline.
Provides tabs for viewing datasets, generating problems, generating solutions,
and exporting the database.
"""
gr.Markdown("# QuantumLLMInstruct: A 500k LLM Instruction-Tuning Dataset with Problem-Solution Pairs for Quantum Computing.")
with gr.Tab("View HF DB"):
"""
Tab for displaying the Hugging Face QuantumLLMInstruct dataset.
Embeds a viewer for exploring the dataset hosted on Hugging Face.
"""
gr.Markdown("### Generated Dataset")
gr.HTML("""<iframe
src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/embed/viewer"
frameborder="0"
width="100%"
height="560px"
></iframe>""")
with gr.Tab("LLM stage 1 model: Generate Problems"):
"""
Tab for generating quantum physics problems using the Qwen model.
Allows users to select models, choose domains, and specify the number of problems to generate.
"""
model_selector = gr.Dropdown(
choices=model_options,
value=model_options[0],
label="Select Qwen Model"
)
reload_button = gr.Button("Reload Model")
reload_status = gr.Textbox(label="Model Status", interactive=False)
generate_button = gr.Button("πŸš€ Generate Instructions For the Pair")
result_output = gr.Textbox(label="Generated Problems", interactive=False)
num_pairs = gr.Radio(choices=[1, 5, 50, 200, 2000, 20000, 200000], value=5, label="Number of Problems")
domain_selector = gr.CheckboxGroup(
choices=list(quantum_problem_domains.keys()),
value=list(quantum_problem_domains.keys()),
label="Select Domain Types"
)
reload_button.click(
reload_model,
inputs=[model_selector],
outputs=[reload_status]
)
def generate_and_display(num_pairs, selected_domains):
"""
Generates multiple quantum problems based on user inputs.
Args:
num_pairs (int): Number of problems to generate.
selected_domains (list): Selected quantum domains for problem generation.
Returns:
str: Status message confirming successful problem generation.
"""
generate_multiple_problems(num_pairs, selected_domains)
return "Problems generated successfully."
generate_button.click(
generate_and_display,
inputs=[num_pairs, domain_selector],
outputs=[result_output]
)
with gr.Tab("LLM Stage 2: Generate Solutions"):
"""
Tab for generating solutions to the quantum problems using the Qwen solution models.
"""
generate_solutions_button = gr.Button("πŸš€ Generate Responses for the Pair")
solutions_status = gr.Textbox(label="Solution Generation Status", interactive=False)
solutions_model_selector = gr.Dropdown(
choices=solutions_model_options,
value=solutions_model_options[4],
label="Select Solution Model"
)
generate_solutions_button.click(
generate_solutions,
inputs=[solutions_model_selector],
outputs=[solutions_status]
)
with gr.Tab("View instruction-pairs DB"):
"""
Tab for viewing data stored in the DuckDB database.
Provides an HTML table viewer for summarized and detailed data exploration.
"""
summary_output = gr.HTML()
view_button = gr.Button("View Data")
db_output_display = gr.HTML()
view_button.click(load_summary_from_duckdb, inputs=None, outputs=summary_output)
def view_db_data():
"""
Loads and formats problems stored in the DuckDB database for display.
Returns:
str: HTML representation of the database content or a message indicating no data.
"""
df = load_problems_from_duckdb()
if df.empty:
return "<p>No data found in the DuckDB database.</p>"
html_table = df.to_html(index=False, escape=False)
return html_table
view_button.click(
view_db_data,
inputs=None,
outputs=[db_output_display]
)
with gr.Tab("Export Parquet"):
"""
Tab for exporting the DuckDB database into a Parquet file format.
Enables efficient data sharing and downstream analysis.
"""
gr.Markdown("### Export DuckDB Data to Parquet Format")
db_file_input = gr.Textbox(label="Database File Path", value="quantum_problems.duckdb")
export_button = gr.Button("Export Data")
export_status = gr.Textbox(label="Export Status", interactive=False)
export_button.click(
export_parquet,
inputs=[db_file_input],
outputs=[export_status]
)
gr.Markdown(description)
app.launch()