Spaces:

JasonTPhillipsJr
/

SpaGAN

Sleeping

JasonTPhillipsJr commited on Nov 11, 2024

Commit

fa29176

verified ·

1 Parent(s): dba4dc1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ bert_model.to(device)
 bert_model.eval()
 #SpaBERT Initialization Section
-data_file_path = 'models/spabert/datasets/SPABERT_finetuning_data_combined.json'                  #Make a new json file with only the geo entities needed, or it takes too long to run.
 pretrained_model_path = 'models/spabert/datasets/fine-spabert-base-uncased-finetuned-osm-mn.pth'
 config = SpatialBertConfig()
@@ -49,7 +49,7 @@ spatialDataset = PbfMapDataset(data_file_path = data_file_path,
                                         label_encoder = None,
                                         mode = None)                                  #If set to None it will use the full dataset for mlm
-data_loader = DataLoader(spatialDataset, batch_size=1, num_workers=0, shuffle=False, pin_memory=False, drop_last=False) #issue needs to be fixed with num_workers not stopping after finished
 # Create a dictionary to map entity names to indices
 entity_index_dict = {entity['pivot_name']: i for i, entity in enumerate(spatialDataset)}
@@ -87,9 +87,7 @@ def process_entity(batch, model, device):
     return spaBERT_embedding, input_ids
 spaBERT_embeddings = []
-for i, batch in enumerate(data_loader):
-    if i >= 2:  # Stop after processing 3 batches
-        break
     spaBERT_embedding, input_ids = process_entity(batch, spaBERT_model, device)
     spaBERT_embeddings.append(spaBERT_embedding)

 bert_model.eval()
 #SpaBERT Initialization Section
+data_file_path = 'models/spabert/datasets/SpaBERTPivots.json'    #Sample file otherwise this model will take too long on CPU.
 pretrained_model_path = 'models/spabert/datasets/fine-spabert-base-uncased-finetuned-osm-mn.pth'
 config = SpatialBertConfig()
                                         label_encoder = None,
                                         mode = None)                                  #If set to None it will use the full dataset for mlm
+data_loader = DataLoader(spatialDataset, batch_size=1, num_workers=0, shuffle=False, pin_memory=False, drop_last=False)
 # Create a dictionary to map entity names to indices
 entity_index_dict = {entity['pivot_name']: i for i, entity in enumerate(spatialDataset)}
     return spaBERT_embedding, input_ids
 spaBERT_embeddings = []
+for batch in (data_loader):
     spaBERT_embedding, input_ids = process_entity(batch, spaBERT_model, device)
     spaBERT_embeddings.append(spaBERT_embedding)