Spaces:

itohtaka
/

my1stspace

Running

App Files Files Community

Takashi Itoh commited on 1 day ago

Commit

f140fcb

•

1 Parent(s): fcac6c0

Reformat

Browse files

Files changed (1) hide show

app.py +451 -131

app.py CHANGED Viewed

@@ -21,6 +21,7 @@ from transformers.modeling_outputs import BaseModelOutput
 os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1"
 import models.fm4m as fm4m
 RDLogger.logger().setLevel(RDLogger.ERROR)
@@ -32,20 +33,41 @@ def smiles_to_image(smiles):
 # Dictionary for SMILES strings and corresponding images (you can replace with your actual image paths)
 smiles_image_mapping = {
-    "Mol 1": {"smiles": "C=C(C)CC(=O)NC[C@H](CO)NC(=O)C=Cc1ccc(C)c(Cl)c1", "image": "img/img1.png"},
     # Example SMILES for ethanol
-    "Mol 2": {"smiles": "C=CC1(CC(=O)NC[C@@H](CCCC)NC(=O)c2cc(Cl)cc(Br)c2)CC1", "image": "img/img2.png"},
     # Example SMILES for butane
-    "Mol 3": {"smiles": "C=C(C)C[C@H](NC(C)=O)C(=O)N1CC[C@H](NC(=O)[C@H]2C[C@@]2(C)Br)C(C)(C)C1",
-              "image": "img/img3.png"},  # Example SMILES for ethylamine
-    "Mol 4": {"smiles": "C=C1CC(CC(=O)N[C@H]2CCN(C(=O)c3ncccc3SC)C23CC3)C1", "image": "img/img4.png"},
     # Example SMILES for diethyl ether
-    "Mol 5": {"smiles": "C=CCS[C@@H](C)CC(=O)OCC", "image": "img/img5.png"}  # Example SMILES for chloroethane
 }
 datasets = [" ", "BACE", "ESOL", "Load Custom Dataset"]
-models_enabled = ["SELFIES-TED", "MHG-GED", "MolFormer", "SMI-TED", "Mordred", "MorganFingerprint"]
 fusion_available = ["Concat"]
@@ -56,7 +78,12 @@ def evaluate_and_log(models, dataset, task_type, eval_output, state):
     result = f"{eval_output}"
     result = result.replace(" Score", "")
-    new_entry = {"Selected Models": str(models), "Dataset": dataset, "Task": task_dic[task_type], "Result": result}
     new_entry_df = pd.DataFrame([new_entry])
     state["log_df"] = pd.concat([new_entry_df, state["log_df"]])
@@ -106,22 +133,42 @@ gen_model = BartForConditionalGeneration.from_pretrained("ibm/materials.selfies-
 def generate(latent_vector, mask):
     encoder_outputs = BaseModelOutput(latent_vector)
-    decoder_output = gen_model.generate(encoder_outputs=encoder_outputs, attention_mask=mask,
-                                        max_new_tokens=64, do_sample=True, top_k=5, top_p=0.95, num_return_sequences=1)
     selfies = gen_tokenizer.batch_decode(decoder_output, skip_special_tokens=True)
     return [sf.decoder(re.sub(r'\]\s*(.*?)\s*\[', r']\1[', i)) for i in selfies]
 def perturb_latent(latent_vecs, noise_scale=0.5):
-    return torch.tensor(np.random.uniform(0, 1, latent_vecs.shape) * noise_scale,
-                                dtype=torch.float32) + latent_vecs
 def encode(selfies):
-    encoding = gen_tokenizer(selfies, return_tensors='pt', max_length=128, truncation=True, padding='max_length')
     input_ids = encoding['input_ids']
     attention_mask = encoding['attention_mask']
-    outputs = gen_model.model.encoder(input_ids=input_ids, attention_mask=attention_mask)
     model_output = outputs.last_hidden_state
     return model_output, attention_mask
@@ -140,7 +187,8 @@ def generate_canonical(smiles):
         mol = Chem.MolFromSmiles(gen[0])
         if mol:
             gen_mol = Chem.MolToSmiles(mol)
-            if gen_mol != Chem.MolToSmiles(Chem.MolFromSmiles(smiles)): break
         else:
             print('Abnormal molecule:', gen[0])
@@ -154,9 +202,20 @@ def generate_canonical(smiles):
         # Prepare the table with ref mol and gen mol
         data = {
             "Property": ["QED", "SA", "LogP", "Mol Wt", "Tanimoto Similarity"],
-            "Reference Mol": [ref_properties[0], ref_properties[1], ref_properties[2], ref_properties[3],
-                              tanimoto_similarity],
-            "Generated Mol": [gen_properties[0], gen_properties[1], gen_properties[2], gen_properties[3], ""]
         }
         df = pd.DataFrame(data)
@@ -184,7 +243,6 @@ def display_eval(selected_models, dataset, task_type, downstream, fusion_type, s
         downstream_model = downstream_model.rstrip()
         params = None
     try:
         if not selected_models:
             return "Please select at least one enabled model."
@@ -195,22 +253,38 @@ def display_eval(selected_models, dataset, task_type, downstream, fusion_type, s
                     downstream_model = "DefaultClassifier"
                     params = None
-                result, state["roc_auc"], state["fpr"], state["tpr"], state["x_batch"], state["y_batch"] = \
-                    fm4m.multi_modal(model_list=selected_models,
-                                        downstream_model=downstream_model,
-                                        params = params,
-                                        dataset=dataset)
             elif task_type == "Regression":
                 if downstream_model == "Default Settings":
                     downstream_model = "DefaultRegressor"
                     params = None
-                result, state["RMSE"], state["y_batch_test"], state["y_prob"], state["x_batch"], state["y_batch"] = \
-                    fm4m.multi_modal(model_list=selected_models,
-                                        downstream_model=downstream_model,
-                                        params=params,
-                                        dataset=dataset)
         else:
             if task_type == "Classification":
@@ -218,22 +292,38 @@ def display_eval(selected_models, dataset, task_type, downstream, fusion_type, s
                     downstream_model = "DefaultClassifier"
                     params = None
-                result, state["roc_auc"], state["fpr"], state["tpr"], state["x_batch"], state["y_batch"] = \
-                    fm4m.single_modal(model=selected_models[0],
-                                        downstream_model=downstream_model,
-                                        params=params,
-                                        dataset=dataset)
             elif task_type == "Regression":
                 if downstream_model == "Default Settings":
                     downstream_model = "DefaultRegressor"
                     params = None
-                result, state["RMSE"], state["y_batch_test"], state["y_prob"], state["x_batch"], state["y_batch"] = \
-                    fm4m.single_modal(model=selected_models[0],
-                                        downstream_model=downstream_model,
-                                        params=params,
-                                        dataset=dataset)
         if result == None:
             result = "Data & Model Setting is incorrect"
@@ -263,7 +353,13 @@ def display_plot(plot_type, state):
         roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr")
         ax.set_title("ROC-AUC Curve")
         try:
-            ax.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.4f})')
             ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
             ax.set_xlim([0.0, 1.0])
             ax.set_ylim([0.0, 1.05])
@@ -275,7 +371,11 @@ def display_plot(plot_type, state):
         ax.legend(loc='lower right')
     elif plot_type == "Parity Plot":
-        RMSE, y_batch_test, y_prob = state.get("RMSE"), state.get("y_batch_test"), state.get("y_prob")
         ax.set_title("Parity plot")
         # change format
@@ -284,7 +384,12 @@ def display_plot(plot_type, state):
             print(y_prob)
             y_batch_test = np.array(y_batch_test, dtype=float)
             y_prob = np.array(y_prob, dtype=float)
-            ax.scatter(y_batch_test, y_prob, color="blue", label=f"Predicted vs Actual (RMSE: {RMSE:.4f})")
             min_val = min(min(y_batch_test), min(y_prob))
             max_val = max(max(y_batch_test), max(y_prob))
             ax.plot([min_val, max_val], [min_val, max_val], 'r-')
@@ -297,7 +402,6 @@ def display_plot(plot_type, state):
             print(y_batch_test)
             print(y_prob)
         ax.set_xlabel('Actual Values')
         ax.set_ylabel('Predicted Values')
@@ -316,13 +420,25 @@ predefined_datasets = {
 # Function to load a predefined dataset from the local path
 def load_predefined_dataset(dataset_name):
     val = predefined_datasets.get(dataset_name)
-    try: file_path = val.split(",")[0]
-    except:file_path=False
     if file_path:
         df = pd.read_csv(file_path)
-        return df.head(), gr.update(choices=list(df.columns)), gr.update(choices=list(df.columns)), f"{dataset_name.lower()}"
-    return pd.DataFrame(), gr.update(choices=[]), gr.update(choices=[]), f"Dataset not found"
 # Function to display the head of the uploaded CSV file
@@ -330,7 +446,11 @@ def display_csv_head(file):
     if file is not None:
         # Load the CSV file into a DataFrame
         df = pd.read_csv(file.name)
-        return df.head(), gr.update(choices=list(df.columns)), gr.update(choices=list(df.columns))
     return pd.DataFrame(), gr.update(choices=[]), gr.update(choices=[])
@@ -338,28 +458,54 @@ def display_csv_head(file):
 def handle_dataset_selection(selected_dataset):
     if selected_dataset == "Custom Dataset":
         # Show file upload fields for train and test datasets if "Custom Dataset" is selected
-        return gr.update(visible=True), gr.update(visible=True),  gr.update(visible=True), gr.update(visible=True), gr.update(
-            visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
     else:
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(
-            visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
 # Function to select input and output columns and display a message
-def select_columns(input_column, output_column, train_data, test_data,dataset_name):
     if input_column and output_column:
         return f"{train_data.name},{test_data.name},{input_column},{output_column},{dataset_name}"
     return "Please select both input and output columns."
-def set_dataname(dataset_name, dataset_selector ):
     if dataset_selector == "Custom Dataset":
         return f"{dataset_name}"
     return f"{dataset_selector}"
 # Function to create model based on user input
-def create_model(model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None):
     if model_name == "XGBClassifier":
-        model = xgb.XGBClassifier(objective='binary:logistic',eval_metric= 'auc', max_depth=max_depth, n_estimators=n_estimators, alpha=alpha)
     elif model_name == "SVR":
         model = SVR(degree=degree, kernel=kernel)
     elif model_name == "Kernel Ridge":
@@ -374,6 +520,7 @@ def create_model(model_name, max_depth=None, n_estimators=None, alpha=None, degr
     return f"{model_name} * {model.get_params()}"
 # Define the Gradio layout
 with gr.Blocks() as demo:
     log_df = pd.DataFrame(
@@ -383,79 +530,174 @@ with gr.Blocks() as demo:
     with gr.Row():
         # Left Column
         with gr.Column():
-            gr.HTML('''
            <div style="background-color: #6A8EAE; color: #FFFFFF; padding: 10px;">
                 <h3 style="color: #FFFFFF; margin: 0;font-size: 20px;"> Data & Model Setting</h3>
             </div>
-            ''')
             # Dropdown menu for predefined datasets including "Custom Dataset" option
-            dataset_selector = gr.Dropdown(label="Select Dataset",
-                                           choices=list(predefined_datasets.keys()) + ["Custom Dataset"])
             # Display the message for selected columns
-            selected_columns_message = gr.Textbox(label="Selected Columns Info", visible=False)
             with gr.Accordion("Dataset Settings", open=True):
                 # File upload options for custom dataset (train and test)
                 dataset_name = gr.Textbox(label="Dataset Name", visible=False)
-                train_file = gr.File(label="Upload Custom Train Dataset", file_types=[".csv"], visible=False)
-                train_display = gr.Dataframe(label="Train Dataset Preview (First 5 Rows)", visible=False, interactive=False)
-                test_file = gr.File(label="Upload Custom Test Dataset", file_types=[".csv"], visible=False)
-                test_display = gr.Dataframe(label="Test Dataset Preview (First 5 Rows)", visible=False, interactive=False)
                 # Predefined dataset displays
-                predefined_display = gr.Dataframe(label="Predefined Dataset Preview (First 5 Rows)", visible=False,
-                                                  interactive=False)
                 # Dropdowns for selecting input and output columns for the custom dataset
-                input_column_selector = gr.Dropdown(label="Select Input Column", choices=[], visible=False)
-                output_column_selector = gr.Dropdown(label="Select Output Column", choices=[], visible=False)
                 # When a dataset is selected, show either file upload fields (for custom) or load predefined datasets
-                dataset_selector.change(handle_dataset_selection,
-                                        inputs=dataset_selector,
-                                        outputs=[dataset_name, train_file, train_display, test_file, test_display, predefined_display,
-                                                 input_column_selector, output_column_selector])
                 # When a predefined dataset is selected, load its head and update column selectors
-                dataset_selector.change(load_predefined_dataset,
-                                        inputs=dataset_selector,
-                                        outputs=[predefined_display, input_column_selector, output_column_selector, selected_columns_message])
                 # When a custom train file is uploaded, display its head and update column selectors
-                train_file.change(display_csv_head, inputs=train_file,
-                                  outputs=[train_display, input_column_selector, output_column_selector])
                 # When a custom test file is uploaded, display its head
-                test_file.change(display_csv_head, inputs=test_file,
-                                 outputs=[test_display, input_column_selector, output_column_selector])
-                dataset_selector.change(set_dataname,
-                                    inputs=[dataset_name, dataset_selector],
-                                    outputs=dataset_name)
                 # Update the selected columns information when dropdown values are changed
-                input_column_selector.change(select_columns,
-                                             inputs=[input_column_selector, output_column_selector, train_file, test_file, dataset_name],
-                                             outputs=selected_columns_message)
-                output_column_selector.change(select_columns,
-                                              inputs=[input_column_selector, output_column_selector, train_file, test_file, dataset_name],
-                                              outputs=selected_columns_message)
-            model_checkbox = gr.CheckboxGroup(choices=models_enabled, label="Select Model")
-            task_radiobutton = gr.Radio(choices=["Classification", "Regression"], label="Task Type")
             ####### adding hyper parameter tuning ###########
-            model_name = gr.Dropdown(["Default - Auto", "XGBClassifier", "SVR", "Kernel Ridge", "Linear Regression"], label="Select Downstream Model")
             with gr.Accordion("Downstream Hyperparameter Settings", open=True):
                 # Create placeholders for hyperparameter components
-                max_depth = gr.Slider(1, 20, step=1,visible=False, label="max_depth")
-                n_estimators = gr.Slider(100, 5000, step=100, visible=False, label="n_estimators")
                 alpha = gr.Slider(0.1, 10.0, step=0.1, visible=False, label="alpha")
-                degree = gr.Slider(1, 20, step=1,visible=False, label="degree")
-                kernel = gr.Dropdown(choices=["rbf", "poly", "linear"], visible=False, label="kernel")
                 # Output textbox
                 output = gr.Textbox(label="Loaded Parameters")
@@ -463,24 +705,52 @@ with gr.Blocks() as demo:
             # Dynamically show relevant hyperparameters based on selected model
             def update_hyperparameters(model_name):
                 if model_name == "XGBClassifier":
-                    return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(
-                        visible=False), gr.update(visible=False)
                 elif model_name == "SVR":
-                    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(
-                        visible=True), gr.update(visible=True)
                 elif model_name == "Kernel Ridge":
-                    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(
-                        visible=True), gr.update(visible=True)
                 elif model_name == "Linear Regression":
-                    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(
-                        visible=False), gr.update(visible=False)
                 elif model_name == "Default - Auto":
-                    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(
-                        visible=False), gr.update(visible=False)
             # When model is selected, update which hyperparameters are visible
-            model_name.change(update_hyperparameters, inputs=[model_name],
-                              outputs=[max_depth, n_estimators, alpha, degree, kernel])
             # Submit button to create the model with selected hyperparameters
             submit_button = gr.Button("Create Downstream Model")
@@ -488,19 +758,29 @@ with gr.Blocks() as demo:
             # Function to handle model creation based on input parameters
             def on_submit(model_name, max_depth, n_estimators, alpha, degree, kernel):
                 if model_name == "XGBClassifier":
-                    return create_model(model_name, max_depth=max_depth, n_estimators=n_estimators, alpha=alpha)
                 elif model_name == "SVR":
                     return create_model(model_name, degree=degree, kernel=kernel)
                 elif model_name == "Kernel Ridge":
-                    return create_model(model_name, alpha=alpha, degree=degree, kernel=kernel)
                 elif model_name == "Linear Regression":
                     return create_model(model_name)
                 elif model_name == "Default - Auto":
                     return create_model(model_name)
             # When the submit button is clicked, run the on_submit function
-            submit_button.click(on_submit, inputs=[model_name, max_depth, n_estimators, alpha, degree, kernel],
-                                outputs=output)
             ###### End of hyper param tuning #########
             fusion_radiobutton = gr.Radio(choices=fusion_available, label="Fusion Type")
@@ -509,40 +789,69 @@ with gr.Blocks() as demo:
         # Middle Column
         with gr.Column():
-            gr.HTML('''
            <div style="background-color: #8F9779; color: #FFFFFF; padding: 10px;">
                 <h3 style="color: #FFFFFF; margin: 0;font-size: 20px;"> Downstream Task 1: Property Prediction</h3>
             </div>
-            ''')
             eval_output = gr.Textbox(label="Train downstream model")
-            plot_radio = gr.Radio(choices=["ROC-AUC", "Parity Plot", "Latent Space"], label="Select Plot Type")
             plot_output = gr.Plot(label="Visualization")
             create_log = gr.Button("Store log")
-            log_table = gr.Dataframe(value=log_df, label="Log of Selections and Results", interactive=False)
-            eval_button.click(display_eval,
-                              inputs=[model_checkbox, selected_columns_message, task_radiobutton, output, fusion_radiobutton, state],
-                              outputs=eval_output)
-            plot_radio.change(display_plot, inputs=[plot_radio, state], outputs=plot_output)
             # Function to gather selected models
             def gather_selected_models(*models):
                 selected = [model for model in models if model]
                 return selected
-            create_log.click(evaluate_and_log, inputs=[model_checkbox, dataset_name, task_radiobutton, eval_output, state],
-                             outputs=log_table)
         # Right Column
         with gr.Column():
-            gr.HTML('''
            <div style="background-color: #D2B48C; color: #FFFFFF; padding: 10px;">
                 <h3 style="color: #FFFFFF; margin: 0;font-size: 20px;"> Downstream Task 2: Molecule Generation</h3>
             </div>
-            ''')
             smiles_input = gr.Textbox(label="Input SMILES String")
             image_display = gr.Image(label="Molecule Image", height=250, width=250)
             # Show images for selection
@@ -554,17 +863,28 @@ with gr.Blocks() as demo:
                 )
                 image_selector.change(load_image, image_selector, image_display)
             generate_button = gr.Button("Generate")
-            gen_image_display = gr.Image(label="Generated Molecule Image", height=250, width=250)
             generated_output = gr.Textbox(label="Generated Output")
             property_table = gr.Dataframe(label="Molecular Properties Comparison")
             # Handle image selection
-            image_selector.change(handle_image_selection, inputs=image_selector, outputs=[smiles_input, image_display])
-            smiles_input.change(smiles_to_image, inputs=smiles_input, outputs=image_display)
             # Generate button to display canonical SMILES and molecule image
-            generate_button.click(generate_canonical, inputs=smiles_input,
-                                  outputs=[property_table, generated_output, gen_image_display])
 if __name__ == "__main__":

 os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1"
 import models.fm4m as fm4m
 RDLogger.logger().setLevel(RDLogger.ERROR)
 # Dictionary for SMILES strings and corresponding images (you can replace with your actual image paths)
 smiles_image_mapping = {
+    "Mol 1": {
+        "smiles": "C=C(C)CC(=O)NC[C@H](CO)NC(=O)C=Cc1ccc(C)c(Cl)c1",
+        "image": "img/img1.png",
+    },
     # Example SMILES for ethanol
+    "Mol 2": {
+        "smiles": "C=CC1(CC(=O)NC[C@@H](CCCC)NC(=O)c2cc(Cl)cc(Br)c2)CC1",
+        "image": "img/img2.png",
+    },
     # Example SMILES for butane
+    "Mol 3": {
+        "smiles": "C=C(C)C[C@H](NC(C)=O)C(=O)N1CC[C@H](NC(=O)[C@H]2C[C@@]2(C)Br)C(C)(C)C1",
+        "image": "img/img3.png",
+    },  # Example SMILES for ethylamine
+    "Mol 4": {
+        "smiles": "C=C1CC(CC(=O)N[C@H]2CCN(C(=O)c3ncccc3SC)C23CC3)C1",
+        "image": "img/img4.png",
+    },
     # Example SMILES for diethyl ether
+    "Mol 5": {
+        "smiles": "C=CCS[C@@H](C)CC(=O)OCC",
+        "image": "img/img5.png",
+    },  # Example SMILES for chloroethane
 }
 datasets = [" ", "BACE", "ESOL", "Load Custom Dataset"]
+models_enabled = [
+    "SELFIES-TED",
+    "MHG-GED",
+    "MolFormer",
+    "SMI-TED",
+    "Mordred",
+    "MorganFingerprint",
+]
 fusion_available = ["Concat"]
     result = f"{eval_output}"
     result = result.replace(" Score", "")
+    new_entry = {
+        "Selected Models": str(models),
+        "Dataset": dataset,
+        "Task": task_dic[task_type],
+        "Result": result,
+    }
     new_entry_df = pd.DataFrame([new_entry])
     state["log_df"] = pd.concat([new_entry_df, state["log_df"]])
 def generate(latent_vector, mask):
     encoder_outputs = BaseModelOutput(latent_vector)
+    decoder_output = gen_model.generate(
+        encoder_outputs=encoder_outputs,
+        attention_mask=mask,
+        max_new_tokens=64,
+        do_sample=True,
+        top_k=5,
+        top_p=0.95,
+        num_return_sequences=1,
+    )
     selfies = gen_tokenizer.batch_decode(decoder_output, skip_special_tokens=True)
     return [sf.decoder(re.sub(r'\]\s*(.*?)\s*\[', r']\1[', i)) for i in selfies]
 def perturb_latent(latent_vecs, noise_scale=0.5):
+    return (
+        torch.tensor(
+            np.random.uniform(0, 1, latent_vecs.shape) * noise_scale,
+            dtype=torch.float32,
+        )
+        + latent_vecs
+    )
 def encode(selfies):
+    encoding = gen_tokenizer(
+        selfies,
+        return_tensors='pt',
+        max_length=128,
+        truncation=True,
+        padding='max_length',
+    )
     input_ids = encoding['input_ids']
     attention_mask = encoding['attention_mask']
+    outputs = gen_model.model.encoder(
+        input_ids=input_ids, attention_mask=attention_mask
+    )
     model_output = outputs.last_hidden_state
     return model_output, attention_mask
         mol = Chem.MolFromSmiles(gen[0])
         if mol:
             gen_mol = Chem.MolToSmiles(mol)
+            if gen_mol != Chem.MolToSmiles(Chem.MolFromSmiles(smiles)):
+                break
         else:
             print('Abnormal molecule:', gen[0])
         # Prepare the table with ref mol and gen mol
         data = {
             "Property": ["QED", "SA", "LogP", "Mol Wt", "Tanimoto Similarity"],
+            "Reference Mol": [
+                ref_properties[0],
+                ref_properties[1],
+                ref_properties[2],
+                ref_properties[3],
+                tanimoto_similarity,
+            ],
+            "Generated Mol": [
+                gen_properties[0],
+                gen_properties[1],
+                gen_properties[2],
+                gen_properties[3],
+                "",
+            ],
         }
         df = pd.DataFrame(data)
         downstream_model = downstream_model.rstrip()
         params = None
     try:
         if not selected_models:
             return "Please select at least one enabled model."
                     downstream_model = "DefaultClassifier"
                     params = None
+                (
+                    result,
+                    state["roc_auc"],
+                    state["fpr"],
+                    state["tpr"],
+                    state["x_batch"],
+                    state["y_batch"],
+                ) = fm4m.multi_modal(
+                    model_list=selected_models,
+                    downstream_model=downstream_model,
+                    params=params,
+                    dataset=dataset,
+                )
             elif task_type == "Regression":
                 if downstream_model == "Default Settings":
                     downstream_model = "DefaultRegressor"
                     params = None
+                (
+                    result,
+                    state["RMSE"],
+                    state["y_batch_test"],
+                    state["y_prob"],
+                    state["x_batch"],
+                    state["y_batch"],
+                ) = fm4m.multi_modal(
+                    model_list=selected_models,
+                    downstream_model=downstream_model,
+                    params=params,
+                    dataset=dataset,
+                )
         else:
             if task_type == "Classification":
                     downstream_model = "DefaultClassifier"
                     params = None
+                (
+                    result,
+                    state["roc_auc"],
+                    state["fpr"],
+                    state["tpr"],
+                    state["x_batch"],
+                    state["y_batch"],
+                ) = fm4m.single_modal(
+                    model=selected_models[0],
+                    downstream_model=downstream_model,
+                    params=params,
+                    dataset=dataset,
+                )
             elif task_type == "Regression":
                 if downstream_model == "Default Settings":
                     downstream_model = "DefaultRegressor"
                     params = None
+                (
+                    result,
+                    state["RMSE"],
+                    state["y_batch_test"],
+                    state["y_prob"],
+                    state["x_batch"],
+                    state["y_batch"],
+                ) = fm4m.single_modal(
+                    model=selected_models[0],
+                    downstream_model=downstream_model,
+                    params=params,
+                    dataset=dataset,
+                )
         if result == None:
             result = "Data & Model Setting is incorrect"
         roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr")
         ax.set_title("ROC-AUC Curve")
         try:
+            ax.plot(
+                fpr,
+                tpr,
+                color='darkorange',
+                lw=2,
+                label=f'ROC curve (area = {roc_auc:.4f})',
+            )
             ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
             ax.set_xlim([0.0, 1.0])
             ax.set_ylim([0.0, 1.05])
         ax.legend(loc='lower right')
     elif plot_type == "Parity Plot":
+        RMSE, y_batch_test, y_prob = (
+            state.get("RMSE"),
+            state.get("y_batch_test"),
+            state.get("y_prob"),
+        )
         ax.set_title("Parity plot")
         # change format
             print(y_prob)
             y_batch_test = np.array(y_batch_test, dtype=float)
             y_prob = np.array(y_prob, dtype=float)
+            ax.scatter(
+                y_batch_test,
+                y_prob,
+                color="blue",
+                label=f"Predicted vs Actual (RMSE: {RMSE:.4f})",
+            )
             min_val = min(min(y_batch_test), min(y_prob))
             max_val = max(max(y_batch_test), max(y_prob))
             ax.plot([min_val, max_val], [min_val, max_val], 'r-')
             print(y_batch_test)
             print(y_prob)
         ax.set_xlabel('Actual Values')
         ax.set_ylabel('Predicted Values')
 # Function to load a predefined dataset from the local path
 def load_predefined_dataset(dataset_name):
     val = predefined_datasets.get(dataset_name)
+    try:
+        file_path = val.split(",")[0]
+    except:
+        file_path = False
     if file_path:
         df = pd.read_csv(file_path)
+        return (
+            df.head(),
+            gr.update(choices=list(df.columns)),
+            gr.update(choices=list(df.columns)),
+            f"{dataset_name.lower()}",
+        )
+    return (
+        pd.DataFrame(),
+        gr.update(choices=[]),
+        gr.update(choices=[]),
+        f"Dataset not found",
+    )
 # Function to display the head of the uploaded CSV file
     if file is not None:
         # Load the CSV file into a DataFrame
         df = pd.read_csv(file.name)
+        return (
+            df.head(),
+            gr.update(choices=list(df.columns)),
+            gr.update(choices=list(df.columns)),
+        )
     return pd.DataFrame(), gr.update(choices=[]), gr.update(choices=[])
 def handle_dataset_selection(selected_dataset):
     if selected_dataset == "Custom Dataset":
         # Show file upload fields for train and test datasets if "Custom Dataset" is selected
+        return (
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=True),
+        )
     else:
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
 # Function to select input and output columns and display a message
+def select_columns(input_column, output_column, train_data, test_data, dataset_name):
     if input_column and output_column:
         return f"{train_data.name},{test_data.name},{input_column},{output_column},{dataset_name}"
     return "Please select both input and output columns."
+def set_dataname(dataset_name, dataset_selector):
     if dataset_selector == "Custom Dataset":
         return f"{dataset_name}"
     return f"{dataset_selector}"
 # Function to create model based on user input
+def create_model(
+    model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None
+):
     if model_name == "XGBClassifier":
+        model = xgb.XGBClassifier(
+            objective='binary:logistic',
+            eval_metric='auc',
+            max_depth=max_depth,
+            n_estimators=n_estimators,
+            alpha=alpha,
+        )
     elif model_name == "SVR":
         model = SVR(degree=degree, kernel=kernel)
     elif model_name == "Kernel Ridge":
     return f"{model_name} * {model.get_params()}"
 # Define the Gradio layout
 with gr.Blocks() as demo:
     log_df = pd.DataFrame(
     with gr.Row():
         # Left Column
         with gr.Column():
+            gr.HTML(
+                '''
            <div style="background-color: #6A8EAE; color: #FFFFFF; padding: 10px;">
                 <h3 style="color: #FFFFFF; margin: 0;font-size: 20px;"> Data & Model Setting</h3>
             </div>
+            '''
+            )
             # Dropdown menu for predefined datasets including "Custom Dataset" option
+            dataset_selector = gr.Dropdown(
+                label="Select Dataset",
+                choices=list(predefined_datasets.keys()) + ["Custom Dataset"],
+            )
             # Display the message for selected columns
+            selected_columns_message = gr.Textbox(
+                label="Selected Columns Info", visible=False
+            )
             with gr.Accordion("Dataset Settings", open=True):
                 # File upload options for custom dataset (train and test)
                 dataset_name = gr.Textbox(label="Dataset Name", visible=False)
+                train_file = gr.File(
+                    label="Upload Custom Train Dataset",
+                    file_types=[".csv"],
+                    visible=False,
+                )
+                train_display = gr.Dataframe(
+                    label="Train Dataset Preview (First 5 Rows)",
+                    visible=False,
+                    interactive=False,
+                )
+                test_file = gr.File(
+                    label="Upload Custom Test Dataset",
+                    file_types=[".csv"],
+                    visible=False,
+                )
+                test_display = gr.Dataframe(
+                    label="Test Dataset Preview (First 5 Rows)",
+                    visible=False,
+                    interactive=False,
+                )
                 # Predefined dataset displays
+                predefined_display = gr.Dataframe(
+                    label="Predefined Dataset Preview (First 5 Rows)",
+                    visible=False,
+                    interactive=False,
+                )
                 # Dropdowns for selecting input and output columns for the custom dataset
+                input_column_selector = gr.Dropdown(
+                    label="Select Input Column", choices=[], visible=False
+                )
+                output_column_selector = gr.Dropdown(
+                    label="Select Output Column", choices=[], visible=False
+                )
                 # When a dataset is selected, show either file upload fields (for custom) or load predefined datasets
+                dataset_selector.change(
+                    handle_dataset_selection,
+                    inputs=dataset_selector,
+                    outputs=[
+                        dataset_name,
+                        train_file,
+                        train_display,
+                        test_file,
+                        test_display,
+                        predefined_display,
+                        input_column_selector,
+                        output_column_selector,
+                    ],
+                )
                 # When a predefined dataset is selected, load its head and update column selectors
+                dataset_selector.change(
+                    load_predefined_dataset,
+                    inputs=dataset_selector,
+                    outputs=[
+                        predefined_display,
+                        input_column_selector,
+                        output_column_selector,
+                        selected_columns_message,
+                    ],
+                )
                 # When a custom train file is uploaded, display its head and update column selectors
+                train_file.change(
+                    display_csv_head,
+                    inputs=train_file,
+                    outputs=[
+                        train_display,
+                        input_column_selector,
+                        output_column_selector,
+                    ],
+                )
                 # When a custom test file is uploaded, display its head
+                test_file.change(
+                    display_csv_head,
+                    inputs=test_file,
+                    outputs=[
+                        test_display,
+                        input_column_selector,
+                        output_column_selector,
+                    ],
+                )
+                dataset_selector.change(
+                    set_dataname,
+                    inputs=[dataset_name, dataset_selector],
+                    outputs=dataset_name,
+                )
                 # Update the selected columns information when dropdown values are changed
+                input_column_selector.change(
+                    select_columns,
+                    inputs=[
+                        input_column_selector,
+                        output_column_selector,
+                        train_file,
+                        test_file,
+                        dataset_name,
+                    ],
+                    outputs=selected_columns_message,
+                )
+                output_column_selector.change(
+                    select_columns,
+                    inputs=[
+                        input_column_selector,
+                        output_column_selector,
+                        train_file,
+                        test_file,
+                        dataset_name,
+                    ],
+                    outputs=selected_columns_message,
+                )
+            model_checkbox = gr.CheckboxGroup(
+                choices=models_enabled, label="Select Model"
+            )
+            task_radiobutton = gr.Radio(
+                choices=["Classification", "Regression"], label="Task Type"
+            )
             ####### adding hyper parameter tuning ###########
+            model_name = gr.Dropdown(
+                [
+                    "Default - Auto",
+                    "XGBClassifier",
+                    "SVR",
+                    "Kernel Ridge",
+                    "Linear Regression",
+                ],
+                label="Select Downstream Model",
+            )
             with gr.Accordion("Downstream Hyperparameter Settings", open=True):
                 # Create placeholders for hyperparameter components
+                max_depth = gr.Slider(1, 20, step=1, visible=False, label="max_depth")
+                n_estimators = gr.Slider(
+                    100, 5000, step=100, visible=False, label="n_estimators"
+                )
                 alpha = gr.Slider(0.1, 10.0, step=0.1, visible=False, label="alpha")
+                degree = gr.Slider(1, 20, step=1, visible=False, label="degree")
+                kernel = gr.Dropdown(
+                    choices=["rbf", "poly", "linear"], visible=False, label="kernel"
+                )
                 # Output textbox
                 output = gr.Textbox(label="Loaded Parameters")
             # Dynamically show relevant hyperparameters based on selected model
             def update_hyperparameters(model_name):
                 if model_name == "XGBClassifier":
+                    return (
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                    )
                 elif model_name == "SVR":
+                    return (
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                    )
                 elif model_name == "Kernel Ridge":
+                    return (
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                        gr.update(visible=True),
+                    )
                 elif model_name == "Linear Regression":
+                    return (
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                    )
                 elif model_name == "Default - Auto":
+                    return (
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                        gr.update(visible=False),
+                    )
             # When model is selected, update which hyperparameters are visible
+            model_name.change(
+                update_hyperparameters,
+                inputs=[model_name],
+                outputs=[max_depth, n_estimators, alpha, degree, kernel],
+            )
             # Submit button to create the model with selected hyperparameters
             submit_button = gr.Button("Create Downstream Model")
             # Function to handle model creation based on input parameters
             def on_submit(model_name, max_depth, n_estimators, alpha, degree, kernel):
                 if model_name == "XGBClassifier":
+                    return create_model(
+                        model_name,
+                        max_depth=max_depth,
+                        n_estimators=n_estimators,
+                        alpha=alpha,
+                    )
                 elif model_name == "SVR":
                     return create_model(model_name, degree=degree, kernel=kernel)
                 elif model_name == "Kernel Ridge":
+                    return create_model(
+                        model_name, alpha=alpha, degree=degree, kernel=kernel
+                    )
                 elif model_name == "Linear Regression":
                     return create_model(model_name)
                 elif model_name == "Default - Auto":
                     return create_model(model_name)
             # When the submit button is clicked, run the on_submit function
+            submit_button.click(
+                on_submit,
+                inputs=[model_name, max_depth, n_estimators, alpha, degree, kernel],
+                outputs=output,
+            )
             ###### End of hyper param tuning #########
             fusion_radiobutton = gr.Radio(choices=fusion_available, label="Fusion Type")
         # Middle Column
         with gr.Column():
+            gr.HTML(
+                '''
            <div style="background-color: #8F9779; color: #FFFFFF; padding: 10px;">
                 <h3 style="color: #FFFFFF; margin: 0;font-size: 20px;"> Downstream Task 1: Property Prediction</h3>
             </div>
+            '''
+            )
             eval_output = gr.Textbox(label="Train downstream model")
+            plot_radio = gr.Radio(
+                choices=["ROC-AUC", "Parity Plot", "Latent Space"],
+                label="Select Plot Type",
+            )
             plot_output = gr.Plot(label="Visualization")
             create_log = gr.Button("Store log")
+            log_table = gr.Dataframe(
+                value=log_df, label="Log of Selections and Results", interactive=False
+            )
+            eval_button.click(
+                display_eval,
+                inputs=[
+                    model_checkbox,
+                    selected_columns_message,
+                    task_radiobutton,
+                    output,
+                    fusion_radiobutton,
+                    state,
+                ],
+                outputs=eval_output,
+            )
+            plot_radio.change(
+                display_plot, inputs=[plot_radio, state], outputs=plot_output
+            )
             # Function to gather selected models
             def gather_selected_models(*models):
                 selected = [model for model in models if model]
                 return selected
+            create_log.click(
+                evaluate_and_log,
+                inputs=[
+                    model_checkbox,
+                    dataset_name,
+                    task_radiobutton,
+                    eval_output,
+                    state,
+                ],
+                outputs=log_table,
+            )
         # Right Column
         with gr.Column():
+            gr.HTML(
+                '''
            <div style="background-color: #D2B48C; color: #FFFFFF; padding: 10px;">
                 <h3 style="color: #FFFFFF; margin: 0;font-size: 20px;"> Downstream Task 2: Molecule Generation</h3>
             </div>
+            '''
+            )
             smiles_input = gr.Textbox(label="Input SMILES String")
             image_display = gr.Image(label="Molecule Image", height=250, width=250)
             # Show images for selection
                 )
                 image_selector.change(load_image, image_selector, image_display)
             generate_button = gr.Button("Generate")
+            gen_image_display = gr.Image(
+                label="Generated Molecule Image", height=250, width=250
+            )
             generated_output = gr.Textbox(label="Generated Output")
             property_table = gr.Dataframe(label="Molecular Properties Comparison")
             # Handle image selection
+            image_selector.change(
+                handle_image_selection,
+                inputs=image_selector,
+                outputs=[smiles_input, image_display],
+            )
+            smiles_input.change(
+                smiles_to_image, inputs=smiles_input, outputs=image_display
+            )
             # Generate button to display canonical SMILES and molecule image
+            generate_button.click(
+                generate_canonical,
+                inputs=smiles_input,
+                outputs=[property_table, generated_output, gen_image_display],
+            )
 if __name__ == "__main__":