File size: 7,446 Bytes
5ebbc77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
"""
Quantum Physics Problem Generator
Shlomo Kashani
Description:
------------
This module is part of the QuantumLLMInstruct system, designed to generate and solve quantum physics problems
using advanced Large Language Models (LLMs). It utilizes a multi-stage pipeline for problem generation,
solution generation, and database management.
Core Functionalities:
---------------------
1. **Problem Generation**:
- Generates quantum physics problems in LaTeX format using LLMs.
- Supports domain-specific problem generation across multiple quantum fields.
2. **Solution Generation**:
- Provides step-by-step LaTeX solutions for the generated problems using a second LLM.
3. **Data Management**:
- Stores generated problems and solutions in DuckDB and Parquet files.
- Enables exporting data in Parquet format for scalability and compatibility.
4. **Gradio Interface**:
- A user-friendly interface to interact with the system, including problem generation,
solution generation, and database exploration.
5. **Hugging Face Integration**:
- Supports visualization and interaction with the dataset on the Hugging Face platform.
Main Components:
----------------
- **initialize_duckdb() / initialize_parquet()**: Initializes the database schema.
- **generate_multiple_problems()**: Generates multiple problems for the selected quantum domains.
- **generate_solutions()**: Solves unsolved problems in the database.
- **export_parquet()**: Exports the database to a Parquet file for external use.
Dependencies:
-------------
- Python 3.7+
- Transformers: `transformers`
- DuckDB: `duckdb`
- Gradio: `gradio`
- Pandas: `pandas`
"""
import gradio as gr
from Q_llm_prompts import *
from Q_quantum_utils import *
initialize_duckdb()
description = """
This demo showcases **[QuantumLLMInstruct](https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/)**
<img src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/resolve/main/qlmmi-detailed-flowchart.jpg" alt="The Pipeline" width="70%" align="center" />
## π Pipeline:
1. **π Problem Generation:** The Qwen model generates a user instruction.
2. **π¬ Solution Generation:** The Qwen model generates a response to this instruction.
"""
# Gradio app
with gr.Blocks() as app:
"""
Main Gradio application block defining the QuantumLLMInstruct pipeline.
Provides tabs for viewing datasets, generating problems, generating solutions,
and exporting the database.
"""
gr.Markdown("# QuantumLLMInstruct: A 500k LLM Instruction-Tuning Dataset with Problem-Solution Pairs for Quantum Computing.")
with gr.Tab("View HF DB"):
"""
Tab for displaying the Hugging Face QuantumLLMInstruct dataset.
Embeds a viewer for exploring the dataset hosted on Hugging Face.
"""
gr.Markdown("### Generated Dataset")
gr.HTML("""<iframe
src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/embed/viewer"
frameborder="0"
width="100%"
height="560px"
></iframe>""")
with gr.Tab("LLM stage 1 model: Generate Problems"):
"""
Tab for generating quantum physics problems using the Qwen model.
Allows users to select models, choose domains, and specify the number of problems to generate.
"""
model_selector = gr.Dropdown(
choices=model_options,
value=model_options[0],
label="Select Qwen Model"
)
reload_button = gr.Button("Reload Model")
reload_status = gr.Textbox(label="Model Status", interactive=False)
generate_button = gr.Button("π Generate Instructions For the Pair")
result_output = gr.Textbox(label="Generated Problems", interactive=False)
num_pairs = gr.Radio(choices=[1, 5, 50, 200, 2000, 20000, 200000], value=5, label="Number of Problems")
domain_selector = gr.CheckboxGroup(
choices=list(quantum_problem_domains.keys()),
value=list(quantum_problem_domains.keys()),
label="Select Domain Types"
)
reload_button.click(
reload_model,
inputs=[model_selector],
outputs=[reload_status]
)
def generate_and_display(num_pairs, selected_domains):
"""
Generates multiple quantum problems based on user inputs.
Args:
num_pairs (int): Number of problems to generate.
selected_domains (list): Selected quantum domains for problem generation.
Returns:
str: Status message confirming successful problem generation.
"""
generate_multiple_problems(num_pairs, selected_domains)
return "Problems generated successfully."
generate_button.click(
generate_and_display,
inputs=[num_pairs, domain_selector],
outputs=[result_output]
)
with gr.Tab("LLM Stage 2: Generate Solutions"):
"""
Tab for generating solutions to the quantum problems using the Qwen solution models.
"""
generate_solutions_button = gr.Button("π Generate Responses for the Pair")
solutions_status = gr.Textbox(label="Solution Generation Status", interactive=False)
solutions_model_selector = gr.Dropdown(
choices=solutions_model_options,
value=solutions_model_options[4],
label="Select Solution Model"
)
generate_solutions_button.click(
generate_solutions,
inputs=[solutions_model_selector],
outputs=[solutions_status]
)
with gr.Tab("View instruction-pairs DB"):
"""
Tab for viewing data stored in the DuckDB database.
Provides an HTML table viewer for summarized and detailed data exploration.
"""
summary_output = gr.HTML()
view_button = gr.Button("View Data")
db_output_display = gr.HTML()
view_button.click(load_summary_from_duckdb, inputs=None, outputs=summary_output)
def view_db_data():
"""
Loads and formats problems stored in the DuckDB database for display.
Returns:
str: HTML representation of the database content or a message indicating no data.
"""
df = load_problems_from_duckdb()
if df.empty:
return "<p>No data found in the DuckDB database.</p>"
html_table = df.to_html(index=False, escape=False)
return html_table
view_button.click(
view_db_data,
inputs=None,
outputs=[db_output_display]
)
with gr.Tab("Export Parquet"):
"""
Tab for exporting the DuckDB database into a Parquet file format.
Enables efficient data sharing and downstream analysis.
"""
gr.Markdown("### Export DuckDB Data to Parquet Format")
db_file_input = gr.Textbox(label="Database File Path", value="quantum_problems.duckdb")
export_button = gr.Button("Export Data")
export_status = gr.Textbox(label="Export Status", interactive=False)
export_button.click(
export_parquet,
inputs=[db_file_input],
outputs=[export_status]
)
gr.Markdown(description)
app.launch()
|