Spaces:

jinysun
/

BiBERTa

Running

App Files Files Community

jinysun commited on Nov 23, 2023

Commit

c5f2040

1 Parent(s): 5fdd261

Upload 5 files

Browse files

Files changed (5) hide show

.gitignore +160 -0
app.py +43 -0
requirements.txt +18 -0
run.py +102 -0
train.py +454 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import streamlit as st
+import pandas as pd
+import rdkit
+import streamlit_ketcher
+from streamlit_ketcher import st_ketcher
+import run
+# Page setup
+st.set_page_config(page_title="DeepDAP", page_icon="🔋", layout="wide")
+st.title("🔋DeepDAP")
+# Connect to the Google Sheet
+url1= r"https://docs.google.com/spreadsheets/d/1AKkZS04VF3osFT36aNHIb4iUbV8D1uNfsldcpHXogj0/gviz/tq?tqx=out:csv&sheet=dap"
+df1 = pd.read_csv(url1, dtype=str, encoding='utf-8')
+text_search = st.text_input("🔍Search papers or molecules", value="")
+m1 = df1["Donor_Name"].str.contains(text_search)
+m2 = df1["reference"].str.contains(text_search)
+m3 = df1["Acceptor_Name"].str.contains(text_search)
+df_search = df1[m1 | m2|m3]
+if text_search:
+    st.write(df_search)
+    st.download_button( "⬇️Download edited files as .csv", df_search.to_csv(), "df_search.csv", use_container_width=True)
+edited_df = st.data_editor(df1, num_rows="dynamic")
+st.download_button(
+    "⬇️ Download edited files as .csv", edited_df.to_csv(), "edited_df.csv", use_container_width=True
+)
+molecule = st.text_input("👨‍🔬Molecule")
+smile_code = st_ketcher(molecule)
+st.markdown("🏆New SMILES of edited molecules: {smile_code }")
+acceptor=  st.text_input("🎈SMILES  of acceptor")
+donor =  st.text_input("🎈SMILES  of donor")
+try:
+    pce = run.smiles_aas_test( str(acceptor ), str(donor) )
+    st.markdown("⚡PCE: ``{pce}``")
+except:
+    st.markdown("⚡PCE:  None  ")

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+altair
+streamlit
+streamlit-ketcher
+torch
+tqdm
+transformers
+pytorch_lightning
+scipy
+pandas
+rdkit
+scikit-learn
+matplotlib
+easydict
+wandb
+networkx
+seaborn

run.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import pandas as pd
+import torch
+from torch.nn import functional as F
+from transformers import AutoTokenizer
+from util.utils import *
+from tqdm import tqdm
+from train import markerModel
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = '0,1'
+device_count = torch.cuda.device_count()
+device_biomarker = torch.device('cuda' if torch.cuda.is_available() else "cpu")
+device = torch.device('cpu')
+d_model_name = 'DeepChem/ChemBERTa-10M-MTR'
+p_model_name = 'DeepChem/ChemBERTa-10M-MLM'
+tokenizer = AutoTokenizer.from_pretrained(d_model_name)
+prot_tokenizer = AutoTokenizer.from_pretrained(p_model_name)
+#--biomarker Model
+##-- hyper param config file Load --##
+config = load_hparams('config/predict.json')
+config = DictX(config)
+model = markerModel.load_from_checkpoint(config.load_checkpoint,strict=False)
+# model = BiomarkerModel.load_from_checkpoint('./biomarker_bindingdb_train8595_pretopre/3477h3wf/checkpoints/epoch=30-step=7284.ckpt').to(device_biomarker)
+model.eval()
+model.freeze()
+if device_biomarker.type == 'cuda':
+    model = torch.nn.DataParallel(model)
+def get_biomarker(drug_inputs, prot_inputs):
+    output_preds = model(drug_inputs, prot_inputs)
+    predict = torch.squeeze((output_preds)).tolist()
+    # output_preds = torch.relu(output_preds)
+    # predict = torch.tanh(output_preds)
+    # predict = predict.squeeze(dim=1).tolist()
+    return predict
+def biomarker_prediction(smile_acc, smile_don):
+    try:
+        aas_input = smile_acc
+        das_input =smile_don
+        d_inputs = tokenizer(aas_input, padding='max_length', max_length=400, truncation=True, return_tensors="pt")
+        # d_inputs = tokenizer(smiles, truncation=True, return_tensors="pt")
+        drug_input_ids = d_inputs['input_ids'].to(device)
+        drug_attention_mask = d_inputs['attention_mask'].to(device)
+        drug_inputs = {'input_ids': drug_input_ids, 'attention_mask': drug_attention_mask}
+        p_inputs = prot_tokenizer(das_input, padding='max_length', max_length=400, truncation=True, return_tensors="pt")
+        # p_inputs = prot_tokenizer(aas_input, truncation=True, return_tensors="pt")
+        prot_input_ids = p_inputs['input_ids'].to(device)
+        prot_attention_mask = p_inputs['attention_mask'].to(device)
+        prot_inputs = {'input_ids': prot_input_ids, 'attention_mask': prot_attention_mask}
+        output_predict = get_biomarker(drug_inputs, prot_inputs)
+        return output_predict
+    except Exception as e:
+        print(e)
+        return {'Error_message': e}
+def smiles_aas_test(smile_acc,smile_don):
+    batch_size = 1
+    try:
+        output_pred = biomarker_prediction((smile_acc), (smile_don))
+        datas = output_pred
+        ## -- Export result data to csv -- ##
+        # df = pd.DataFrame(datas)
+        # df.to_csv('./results/predict_test.csv', index=None)
+        # print(df)
+        return datas
+    except Exception as e:
+        print(e)
+        return {'Error_message': e}
+if __name__ == "__main__":
+    a = smiles_aas_test(smile_acc,smile_don)

train.py ADDED Viewed

	@@ -0,0 +1,454 @@

+import os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+from curses import delay_output
+import gc, os
+import numpy as np
+import pandas as pd
+import wandb
+from scipy.stats import pearsonr
+from util.utils import *
+from util.attention_flow import *
+import torch
+import torch.nn as nn
+import sklearn as sk
+from torch.utils.data import Dataset, DataLoader
+import pytorch_lightning as pl
+from pytorch_lightning.loggers import WandbLogger, TensorBoardLogger
+from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
+from transformers import AutoConfig, AutoTokenizer, RobertaModel, BertModel
+from sklearn.metrics import r2_score, mean_absolute_error,mean_squared_error
+class markerDataset(Dataset):
+    def __init__(self, list_IDs, labels, df_dti, d_tokenizer, p_tokenizer):
+        'Initialization'
+        self.labels = labels
+        self.list_IDs = list_IDs
+        self.df = df_dti
+        self.d_tokenizer = d_tokenizer
+        self.p_tokenizer = p_tokenizer
+    def convert_data(self, acc_data, don_data):
+        d_inputs = self.d_tokenizer(acc_data, return_tensors="pt")
+        p_inputs = self.d_tokenizer(don_data, return_tensors="pt")
+        acc_input_ids = d_inputs['input_ids']
+        acc_attention_mask = d_inputs['attention_mask']
+        acc_inputs = {'input_ids': acc_input_ids, 'attention_mask': acc_attention_mask}
+        don_input_ids = p_inputs['input_ids']
+        don_attention_mask = p_inputs['attention_mask']
+        don_inputs = {'input_ids': don_input_ids, 'attention_mask': don_attention_mask}
+        return acc_inputs, don_inputs
+    def tokenize_data(self, acc_data, don_data):
+        tokenize_acc = ['[CLS]'] + self.d_tokenizer.tokenize(acc_data) + ['[SEP]']
+        tokenize_don = ['[CLS]'] + self.p_tokenizer.tokenize(don_data) + ['[SEP]']
+        return tokenize_acc, tokenize_don
+    def __len__(self):
+        'Denotes the total number of samples'
+        return len(self.list_IDs)
+    def __getitem__(self, index):
+        'Generates one sample of data'
+        index = self.list_IDs[index]
+        acc_data = self.df.iloc[index]['acceptor']
+        don_data = self.df.iloc[index]['donor']
+        d_inputs = self.d_tokenizer(acc_data, padding='max_length', max_length=400, truncation=True, return_tensors="pt")
+        p_inputs = self.p_tokenizer(don_data, padding='max_length', max_length=400, truncation=True, return_tensors="pt")
+        d_input_ids = d_inputs['input_ids'].squeeze()
+        d_attention_mask = d_inputs['attention_mask'].squeeze()
+        p_input_ids = p_inputs['input_ids'].squeeze()
+        p_attention_mask = p_inputs['attention_mask'].squeeze()
+        labels = torch.as_tensor(self.labels[index], dtype=torch.float)
+        dataset = [d_input_ids, d_attention_mask, p_input_ids, p_attention_mask, labels]
+        return dataset
+class markerDataModule(pl.LightningDataModule):
+    def __init__(self, task_name, acc_model_name, don_model_name, num_workers, batch_size,  traindata_rate = 1.0):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.task_name = task_name
+        self.traindata_rate = traindata_rate
+        self.d_tokenizer = AutoTokenizer.from_pretrained(acc_model_name)
+        self.p_tokenizer = AutoTokenizer.from_pretrained(don_model_name)
+        self.df_train = None
+        self.df_val = None
+        self.df_test = None
+        self.load_testData = True
+        self.train_dataset = None
+        self.valid_dataset = None
+        self.test_dataset = None
+    def get_task(self, task_name):
+        if task_name.lower() == 'OSC':
+            return './dataset/OSC/'
+        elif task_name.lower() == 'merge':
+            self.load_testData = False
+            return './dataset/MergeDataset'
+    def prepare_data(self):
+        # Use this method to do things that might write to disk or that need to be done only from
+        # a single process in distributed settings.
+        dataFolder = './dataset/OSC'
+        self.df_train = pd.read_csv(dataFolder + '/train.csv')
+        self.df_val = pd.read_csv(dataFolder + '/val.csv')
+        ## -- Data Lenght Rate apply -- ##
+        traindata_length = int(len(self.df_train) * self.traindata_rate)
+        validdata_length = int(len(self.df_val) * self.traindata_rate)
+        self.df_train = self.df_train[:traindata_length]
+        self.df_val = self.df_val[:validdata_length]
+        if self.load_testData is True:
+            self.df_test = pd.read_csv(dataFolder + '/test.csv')
+    def setup(self, stage=None):
+        if stage == 'fit' or stage is None:
+            self.train_dataset = markerDataset(self.df_train.index.values, self.df_train.Label.values, self.df_train,
+                                                  self.d_tokenizer, self.p_tokenizer)
+            self.valid_dataset = markerDataset(self.df_val.index.values, self.df_val.Label.values, self.df_val,
+                                                  self.d_tokenizer, self.p_tokenizer)
+        if self.load_testData is True:
+            self.test_dataset = markerDataset(self.df_test.index.values, self.df_test.Label.values, self.df_test,
+                                                self.d_tokenizer, self.p_tokenizer)
+    def train_dataloader(self):
+        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)
+    def val_dataloader(self):
+        return DataLoader(self.valid_dataset, batch_size=self.batch_size, num_workers=self.num_workers)
+    def test_dataloader(self):
+        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=self.num_workers)
+class markerModel(pl.LightningModule):
+    def __init__(self, acc_model_name, don_model_name, lr, dropout, layer_features, loss_fn = "smooth", layer_limit = True, d_pretrained=True, p_pretrained=True):
+        super().__init__()
+        self.lr = lr
+        self.loss_fn = loss_fn
+        self.criterion = torch.nn.MSELoss()
+        self.criterion_smooth = torch.nn.SmoothL1Loss()
+        # self.sigmoid = nn.Sigmoid()
+        #-- Pretrained Model Setting
+        acc_config = AutoConfig.from_pretrained("seyonec/SMILES_BPE_PubChem_100k_shard00")
+        if d_pretrained is False:
+            self.d_model = RobertaModel(acc_config)
+            print('acceptor model without pretraining')
+        else:
+            self.d_model = RobertaModel.from_pretrained(acc_model_name, num_labels=2,
+                                                        output_hidden_states=True,
+                                                        output_attentions=True)
+        don_config = AutoConfig.from_pretrained("seyonec/SMILES_BPE_PubChem_100k_shard00")
+        if p_pretrained is False:
+            self.p_model = RobertaModel(don_config)
+            print('donor model without pretraining')
+        else:
+            self.p_model = RobertaModel.from_pretrained(don_model_name,
+                                                        output_hidden_states=True,
+                                                        output_attentions=True)
+        #-- Decoder Layer Setting
+        layers = []
+        firstfeature = self.d_model.config.hidden_size + self.p_model.config.hidden_size
+        for feature_idx in range(0, len(layer_features) - 1):
+            layers.append(nn.Linear(firstfeature, layer_features[feature_idx]))
+            firstfeature = layer_features[feature_idx]
+            if feature_idx is len(layer_features)-2:
+                layers.append(nn.ReLU())
+            else:
+                layers.append(nn.ReLU())
+            if dropout > 0:
+                layers.append(nn.Dropout(dropout))
+        layers.append(nn.Linear(firstfeature, layer_features[-1]))
+        self.decoder = nn.Sequential(*layers)
+        self.save_hyperparameters()
+    def forward(self, acc_inputs, don_inputs):
+        d_outputs = self.d_model(acc_inputs['input_ids'], acc_inputs['attention_mask'])
+        p_outputs = self.p_model(don_inputs['input_ids'], don_inputs['attention_mask'])
+        outs = torch.cat((d_outputs.last_hidden_state[:, 0], p_outputs.last_hidden_state[:, 0]), dim=1)
+        outs = self.decoder(outs)
+        return outs
+    def attention_output(self, acc_inputs, don_inputs):
+        d_outputs = self.d_model(acc_inputs['input_ids'], acc_inputs['attention_mask'])
+        p_outputs = self.p_model(don_inputs['input_ids'], don_inputs['attention_mask'])
+        outs = torch.cat((d_outputs.last_hidden_state[:, 0], p_outputs.last_hidden_state[:, 0]), dim=1)
+        outs = self.decoder(outs)
+        return d_outputs['attentions'], p_outputs['attentions'], outs
+    def training_step(self, batch, batch_idx):
+        acc_inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}
+        don_inputs = {'input_ids': batch[2], 'attention_mask': batch[3]}
+        labels = batch[4]
+        output = self(acc_inputs, don_inputs)
+        logits = output.squeeze(dim=1)
+        if self.loss_fn == 'MSE':
+            loss = self.criterion(logits, labels)
+        else:
+            loss = self.criterion_smooth(logits, labels)
+        self.log("train_loss", loss, on_step=False, on_epoch=True, logger=True)
+       # print("train_loss", loss)
+        return {"loss": loss}
+    def validation_step(self, batch, batch_idx):
+        acc_inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}
+        don_inputs = {'input_ids': batch[2], 'attention_mask': batch[3]}
+        labels = batch[4]
+        output = self(acc_inputs, don_inputs)
+        logits = output.squeeze(dim=1)
+        if self.loss_fn == 'MSE':
+            loss = self.criterion(logits, labels)
+        else:
+            loss = self.criterion_smooth(logits, labels)
+        self.log("valid_loss", loss, on_step=False, on_epoch=True, logger=True)
+       # print("valid_loss", loss)
+        return {"logits": logits, "labels": labels}
+    def validation_step_end(self, outputs):
+        return {"logits": outputs['logits'], "labels": outputs['labels']}
+    def validation_epoch_end(self, outputs):
+        preds = self.convert_outputs_to_preds(outputs)
+        labels = torch.as_tensor(torch.cat([output['labels'] for output in outputs], dim=0), dtype=torch.int)
+        mae, mse, r2,r = self.log_score(preds, labels)
+        self.log("mae", mae, on_step=False, on_epoch=True, logger=True)
+        self.log("mse", mse, on_step=False, on_epoch=True, logger=True)
+        self.log("r2", r2, on_step=False, on_epoch=True, logger=True)
+    def test_step(self, batch, batch_idx):
+        acc_inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}
+        don_inputs = {'input_ids': batch[2], 'attention_mask': batch[3]}
+        labels = batch[4]
+        output = self(acc_inputs, don_inputs)
+        logits = output.squeeze(dim=1)
+        if self.loss_fn == 'MSE':
+            loss = self.criterion(logits, labels)
+        else:
+            loss = self.criterion_smooth(logits, labels)
+        self.log("test_loss", loss, on_step=False, on_epoch=True, logger=True)
+        return {"logits": logits, "labels": labels}
+    def test_step_end(self, outputs):
+        return {"logits": outputs['logits'], "labels": outputs['labels']}
+    def test_epoch_end(self, outputs):
+        preds = self.convert_outputs_to_preds(outputs)
+        labels = torch.as_tensor(torch.cat([output['labels'] for output in outputs], dim=0), dtype=torch.int)
+        mae, mse, r2,r = self.log_score(preds, labels)
+        self.log("mae", mae, on_step=False, on_epoch=True, logger=True)
+        self.log("mse", mse, on_step=False, on_epoch=True, logger=True)
+        self.log("r2", r2, on_step=False, on_epoch=True, logger=True)
+        self.log("r", r, on_step=False, on_epoch=True, logger=True)
+    def configure_optimizers(self):
+        param_optimizer = list(self.named_parameters())
+        no_decay = ["bias", "gamma", "beta"]
+        optimizer_grouped_parameters = [
+            {
+                "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
+                "weight_decay_rate": 0.0001
+            },
+            {
+                "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
+                "weight_decay_rate": 0.0
+            },
+        ]
+        optimizer = torch.optim.AdamW(
+            optimizer_grouped_parameters,
+            lr=self.lr,
+        )
+        return optimizer
+    def convert_outputs_to_preds(self, outputs):
+        logits = torch.cat([output['logits'] for output in outputs], dim=0)
+        return logits
+    def log_score(self, preds, labels):
+        y_pred = preds.detach().cpu().numpy()
+        y_label = labels.detach().cpu().numpy()
+        mae = mean_absolute_error(y_label, y_pred)
+        mse =  mean_squared_error(y_label, y_pred)
+        r2=r2_score(y_label, y_pred)
+        r = pearsonr(y_label, y_pred)
+        print(f'\nmae : {mae}')
+        print(f'mse : {mse}')
+        print(f'r2 : {r2}')
+        print(f'r : {r}')
+        return mae, mse, r2, r
+def main_wandb(config=None):
+    try:
+        if config is not None:
+            wandb.init(config=config, project=project_name)
+        else:
+            wandb.init(settings=wandb.Settings(console='off'))
+        config = wandb.config
+        pl.seed_everything(seed=config.num_seed)
+        dm = markerDataModule(config.task_name, config.d_model_name, config.p_model_name,
+                                 config.num_workers, config.batch_size, config.prot_maxlength, config.traindata_rate)
+        dm.prepare_data()
+        dm.setup()
+        model_type = str(config.pretrained['chem'])+"To"+str(config.pretrained['prot'])
+        #model_logger = WandbLogger(project=project_name)
+        checkpoint_callback = ModelCheckpoint(f"{config.task_name}_{model_type}_{config.lr}_{config.num_seed}", save_top_k=1, monitor="mae", mode="max")
+        trainer = pl.Trainer(
+                             max_epochs=config.max_epoch,
+                             precision=16,
+                             #logger=model_logger,
+                             callbacks=[checkpoint_callback],
+                             accelerator='cpu',log_every_n_steps=40
+                             )
+        if config.model_mode == "train":
+            model = markerModel(config.d_model_name, config.p_model_name,
+                               config.lr, config.dropout, config.layer_features, config.loss_fn, config.layer_limit, config.pretrained['chem'], config.pretrained['prot'])
+            model.train()
+            trainer.fit(model, datamodule=dm)
+            model.eval()
+            trainer.test(model, datamodule=dm)
+        else:
+            model = markerModel.load_from_checkpoint(config.load_checkpoint)
+            model.eval()
+            trainer.test(model, datamodule=dm)
+    except Exception as e:
+        print(e)
+def main_default(config):
+    try:
+        config = DictX(config)
+        pl.seed_everything(seed=config.num_seed)
+        dm = markerDataModule(config.task_name, config.d_model_name, config.p_model_name,
+                                 config.num_workers, config.batch_size, config.traindata_rate)
+        dm.prepare_data()
+        dm.setup()
+        model_type = str(config.pretrained['chem'])+"To"+str(config.pretrained['prot'])
+       # model_logger = TensorBoardLogger("./log", name=f"{config.task_name}_{model_type}_{config.num_seed}")
+        checkpoint_callback = ModelCheckpoint(f"{config.task_name}_{model_type}_{config.lr}_{config.num_seed}", save_top_k=1, monitor="mse", mode="max")
+        trainer = pl.Trainer(
+                             max_epochs=config.max_epoch,
+                             precision= 32,
+                            # logger=model_logger,
+                             callbacks=[checkpoint_callback],
+                             accelerator='cpu',log_every_n_steps=40
+                             )
+        if config.model_mode == "train":
+            model = markerModel(config.d_model_name, config.p_model_name,
+                               config.lr, config.dropout, config.layer_features, config.loss_fn, config.layer_limit, config.pretrained['chem'], config.pretrained['prot'])
+            model.train()
+            trainer.fit(model, datamodule=dm)
+            model.eval()
+            trainer.test(model, datamodule=dm)
+        else:
+            model = markerModel.load_from_checkpoint(config.load_checkpoint)
+            model.eval()
+            trainer.test(model, datamodule=dm)
+    except Exception as e:
+        print(e)
+if __name__ == '__main__':
+    using_wandb = False
+    if using_wandb == True:
+        #-- hyper param config file Load --##
+        config = load_hparams('config/config_hparam.json')
+        project_name = config["name"]
+        main_wandb(config)
+        ##-- wandb Sweep Hyper Param Tuning --##
+        # config = load_hparams('config/config_sweep_bindingDB.json')
+        # project_name = config["name"]
+        # sweep_id = wandb.sweep(config, project=project_name)
+        # wandb.agent(sweep_id, main_wandb)
+    else:
+        config = load_hparams('config/config_hparam.json')
+        main_default(config)