Biotech2 / interface.py
C2MV's picture
Update interface.py
6660e8c verified
raw
history blame
16.6 kB
# interface.py
import gradio as gr
from models import BioprocessModel
import io
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sympy import symbols, sympify, lambdify
import copy
from config import DEVICE, MODEL_PATH, MAX_LENGTH, TEMPERATURE
device = DEVICE
model_path = MODEL_PATH
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path).to(device).eval()
def generate_analysis(prompt, max_length=MAX_LENGTH):
try:
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
generated_ids = model.generate(
input_ids=input_ids,
max_length=max_length + len(input_ids[0]),
temperature=TEMPERATURE,
num_return_sequences=1,
no_repeat_ngram_size=2,
early_stopping=True
)
output_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
analysis = output_text[len(prompt):].strip()
return analysis
except Exception as e:
return f"An error occurred during analysis: {e}"
def parse_bounds(bounds_str, num_params):
try:
bounds = eval(f"[{bounds_str}]")
if len(bounds) != num_params:
raise ValueError
lower_bounds = [b[0] for b in bounds]
upper_bounds = [b[1] for b in bounds]
return lower_bounds, upper_bounds
except:
lower_bounds = [-np.inf] * num_params
upper_bounds = [np.inf] * num_params
return lower_bounds, upper_bounds
def process_and_plot(
file,
biomass_eq1, biomass_eq2, biomass_eq3,
biomass_param1, biomass_param2, biomass_param3,
biomass_bound1, biomass_bound2, biomass_bound3,
substrate_eq1, substrate_eq2, substrate_eq3,
substrate_param1, substrate_param2, substrate_param3,
substrate_bound1, substrate_bound2, substrate_bound3,
product_eq1, product_eq2, product_eq3,
product_param1, product_param2, product_param3,
product_bound1, product_bound2, product_bound3,
legend_position,
show_legend,
show_params,
biomass_eq_count,
substrate_eq_count,
product_eq_count
):
biomass_eqs = [biomass_eq1, biomass_eq2, biomass_eq3][:biomass_eq_count]
biomass_params = [biomass_param1, biomass_param2, biomass_param3][:biomass_eq_count]
biomass_bounds = [biomass_bound1, biomass_bound2, biomass_bound3][:biomass_eq_count]
substrate_eqs = [substrate_eq1, substrate_eq2, substrate_eq3][:substrate_eq_count]
substrate_params = [substrate_param1, substrate_param2, substrate_param3][:substrate_eq_count]
substrate_bounds = [substrate_bound1, substrate_bound2, substrate_bound3][:substrate_eq_count]
product_eqs = [product_eq1, product_eq2, product_eq3][:product_eq_count]
product_params = [product_param1, product_param2, product_param3][:product_eq_count]
product_bounds = [product_bound1, product_bound2, product_bound3][:product_eq_count]
df = pd.read_excel(file.name)
time = df['Time'].values
biomass_data = df['Biomass'].values
substrate_data = df['Substrate'].values
product_data = df['Product'].values
biomass_results = []
substrate_results = []
product_results = []
for i in range(len(biomass_eqs)):
equation = biomass_eqs[i]
params_str = biomass_params[i]
bounds_str = biomass_bounds[i]
model = BioprocessModel()
model.set_model('biomass', equation, params_str)
params = [param.strip() for param in params_str.split(',')]
lower_bounds, upper_bounds = parse_bounds(bounds_str, len(params))
y_pred = model.fit_model(
'biomass', time, biomass_data,
bounds=(lower_bounds, upper_bounds)
)
biomass_results.append({
'model': copy.deepcopy(model),
'y_pred': y_pred,
'equation': equation
})
biomass_model = biomass_results[0]['model']
X_t = biomass_model.models['biomass']['function']
biomass_params_values = list(biomass_model.params['biomass'].values())
for i in range(len(substrate_eqs)):
equation = substrate_eqs[i]
params_str = substrate_params[i]
bounds_str = substrate_bounds[i]
model = BioprocessModel()
t_symbol = symbols('t')
expr_substrate = sympify(equation)
substrate_params_symbols = symbols([param.strip() for param in params_str.split(',')])
substrate_func = lambdify(
(t_symbol, *substrate_params_symbols),
expr_substrate.subs('X(t)', X_t(t_symbol, *biomass_params_values)),
'numpy'
)
model.models['substrate'] = {
'function': substrate_func,
'params': [param.strip() for param in params_str.split(',')]
}
params = model.models['substrate']['params']
lower_bounds, upper_bounds = parse_bounds(bounds_str, len(params))
y_pred = model.fit_model(
'substrate', time, substrate_data,
bounds=(lower_bounds, upper_bounds)
)
substrate_results.append({
'model': copy.deepcopy(model),
'y_pred': y_pred,
'equation': equation
})
for i in range(len(product_eqs)):
equation = product_eqs[i]
params_str = product_params[i]
bounds_str = product_bounds[i]
model = BioprocessModel()
t_symbol = symbols('t')
expr_product = sympify(equation)
product_params_symbols = symbols([param.strip() for param in params_str.split(',')])
product_func = lambdify(
(t_symbol, *product_params_symbols),
expr_product.subs('X(t)', X_t(t_symbol, *biomass_params_values)),
'numpy'
)
model.models['product'] = {
'function': product_func,
'params': [param.strip() for param in params_str.split(',')]
}
params = model.models['product']['params']
lower_bounds, upper_bounds = parse_bounds(bounds_str, len(params))
y_pred = model.fit_model(
'product', time, product_data,
bounds=(lower_bounds, upper_bounds)
)
product_results.append({
'model': copy.deepcopy(model),
'y_pred': y_pred,
'equation': equation
})
fig, axs = plt.subplots(3, 1, figsize=(10, 15))
# Biomass Plot
axs[0].plot(time, biomass_data, 'o', label='Biomass Data')
for i, result in enumerate(biomass_results):
axs[0].plot(time, result['y_pred'], '-', label=f'Biomass Model {i+1}')
axs[0].set_xlabel('Time')
axs[0].set_ylabel('Biomass')
if show_legend:
axs[0].legend(loc=legend_position)
# Substrate Plot
axs[1].plot(time, substrate_data, 'o', label='Substrate Data')
for i, result in enumerate(substrate_results):
axs[1].plot(time, result['y_pred'], '-', label=f'Substrate Model {i+1}')
axs[1].set_xlabel('Time')
axs[1].set_ylabel('Substrate')
if show_legend:
axs[1].legend(loc=legend_position)
# Product Plot
axs[2].plot(time, product_data, 'o', label='Product Data')
for i, result in enumerate(product_results):
axs[2].plot(time, result['y_pred'], '-', label=f'Product Model {i+1}')
axs[2].set_xlabel('Time')
axs[2].set_ylabel('Product')
if show_legend:
axs[2].legend(loc=legend_position)
plt.tight_layout()
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
image = Image.open(buf)
all_results = {
'biomass_models': [],
'substrate_models': [],
'product_models': []
}
for i, result in enumerate(biomass_results):
model_info = {
'model_number': i + 1,
'equation': result['equation'],
'parameters': result['model'].params['biomass'],
'R2': result['model'].r2['biomass'],
'RMSE': result['model'].rmse['biomass']
}
all_results['biomass_models'].append(model_info)
for i, result in enumerate(substrate_results):
model_info = {
'model_number': i + 1,
'equation': result['equation'],
'parameters': result['model'].params['substrate'],
'R2': result['model'].r2['substrate'],
'RMSE': result['model'].rmse['substrate']
}
all_results['substrate_models'].append(model_info)
for i, result in enumerate(product_results):
model_info = {
'model_number': i + 1,
'equation': result['equation'],
'parameters': result['model'].params['product'],
'R2': result['model'].r2['product'],
'RMSE': result['model'].rmse['product']
}
all_results['product_models'].append(model_info)
results_text = "Experimental Results:\n\n"
results_text += "Biomass Models:\n"
for model_info in all_results['biomass_models']:
results_text += f"""
Model {model_info['model_number']}:
Equation: {model_info['equation']}
Parameters: {model_info['parameters']}
R²: {model_info['R2']:.4f}
RMSE: {model_info['RMSE']:.4f}
"""
results_text += "\nSubstrate Models:\n"
for model_info in all_results['substrate_models']:
results_text += f"""
Model {model_info['model_number']}:
Equation: {model_info['equation']}
Parameters: {model_info['parameters']}
R²: {model_info['R2']:.4f}
RMSE: {model_info['RMSE']:.4f}
"""
results_text += "\nProduct Models:\n"
for model_info in all_results['product_models']:
results_text += f"""
Model {model_info['model_number']}:
Equation: {model_info['equation']}
Parameters: {model_info['parameters']}
R²: {model_info['R2']:.4f}
RMSE: {model_info['RMSE']:.4f}
"""
prompt = f"""
You are an expert in bioprocess modeling.
Analyze the following experimental results and provide a verdict on the quality of the models, suggesting improvements if necessary.
{results_text}
Your analysis should be detailed and professional.
"""
analysis = generate_analysis(prompt)
return [image], analysis
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("# Bioprocess Modeling Application with Yi-Coder Integration")
file_input = gr.File(label="Upload Excel File")
MAX_EQUATIONS = 3
biomass_equations = []
biomass_params = []
biomass_bounds = []
substrate_equations = []
substrate_params = []
substrate_bounds = []
product_equations = []
product_params = []
product_bounds = []
def create_model_inputs(model_name, equations_list, params_list, bounds_list):
with gr.Column():
gr.Markdown(f"### {model_name} Models")
for i in range(MAX_EQUATIONS):
with gr.Row(visible=(i == 0)) as row:
equation_input = gr.Textbox(
label=f"{model_name} Model {i+1} Equation",
placeholder="Enter equation in terms of t and parameters",
lines=1,
value="" if i > 0 else "Default equation"
)
params_input = gr.Textbox(
label=f"{model_name} Model {i+1} Parameters",
placeholder="Comma-separated parameters",
lines=1,
value="" if i > 0 else "Parameters"
)
bounds_input = gr.Textbox(
label=f"{model_name} Model {i+1} Bounds",
placeholder="(lower, upper) for each parameter",
lines=1
)
equations_list.append((row, equation_input))
params_list.append(params_input)
bounds_list.append(bounds_input)
add_btn = gr.Button(f"Add {model_name} Equation")
remove_btn = gr.Button(f"Remove {model_name} Equation")
return add_btn, remove_btn
with gr.Accordion("Model Definitions", open=True):
with gr.Row():
with gr.Column():
add_biomass_btn, remove_biomass_btn = create_model_inputs(
"Biomass", biomass_equations, biomass_params, biomass_bounds
)
with gr.Column():
add_substrate_btn, remove_substrate_btn = create_model_inputs(
"Substrate", substrate_equations, substrate_params, substrate_bounds
)
with gr.Column():
add_product_btn, remove_product_btn = create_model_inputs(
"Product", product_equations, product_params, product_bounds
)
legend_position = gr.Radio(
choices=["upper left", "upper right", "lower left", "lower right", "best"],
label="Legend Position",
value="best"
)
show_legend = gr.Checkbox(label="Show Legend", value=True)
show_params = gr.Checkbox(label="Show Parameters", value=True)
simulate_btn = gr.Button("Simulate")
with gr.Row():
output_gallery = gr.Gallery(label="Results", columns=2, height='auto')
analysis_output = gr.Textbox(label="Yi-Coder Analysis", lines=15)
biomass_eq_count = gr.Number(value=1, visible=False)
substrate_eq_count = gr.Number(value=1, visible=False)
product_eq_count = gr.Number(value=1, visible=False)
def add_equation(equations_list, eq_count):
eq_count = min(eq_count + 1, MAX_EQUATIONS)
for i, (row, _) in enumerate(equations_list):
row.visible = i < eq_count
return [row.update(visible=row.visible) for row, _ in equations_list], eq_count
def remove_equation(equations_list, eq_count):
eq_count = max(eq_count - 1, 1)
for i, (row, _) in enumerate(equations_list):
row.visible = i < eq_count
return [row.update(visible=row.visible) for row, _ in equations_list], eq_count
add_biomass_btn.click(
fn=lambda eq_count: add_equation(biomass_equations, eq_count),
inputs=biomass_eq_count,
outputs=[*[row for row, _ in biomass_equations], biomass_eq_count]
)
remove_biomass_btn.click(
fn=lambda eq_count: remove_equation(biomass_equations, eq_count),
inputs=biomass_eq_count,
outputs=[*[row for row, _ in biomass_equations], biomass_eq_count]
)
add_substrate_btn.click(
fn=lambda eq_count: add_equation(substrate_equations, eq_count),
inputs=substrate_eq_count,
outputs=[*[row for row, _ in substrate_equations], substrate_eq_count]
)
remove_substrate_btn.click(
fn=lambda eq_count: remove_equation(substrate_equations, eq_count),
inputs=substrate_eq_count,
outputs=[*[row for row, _ in substrate_equations], substrate_eq_count]
)
add_product_btn.click(
fn=lambda eq_count: add_equation(product_equations, eq_count),
inputs=product_eq_count,
outputs=[*[row for row, _ in product_equations], product_eq_count]
)
remove_product_btn.click(
fn=lambda eq_count: remove_equation(product_equations, eq_count),
inputs=product_eq_count,
outputs=[*[row for row, _ in product_equations], product_eq_count]
)
simulate_inputs = [
file_input,
*[eq_input for row, eq_input in biomass_equations],
*biomass_params,
*biomass_bounds,
*[eq_input for row, eq_input in substrate_equations],
*substrate_params,
*substrate_bounds,
*[eq_input for row, eq_input in product_equations],
*product_params,
*product_bounds,
legend_position,
show_legend,
show_params,
biomass_eq_count,
substrate_eq_count,
product_eq_count
]
simulate_btn.click(
fn=process_and_plot,
inputs=simulate_inputs,
outputs=[output_gallery, analysis_output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch()