Ramlaoui commited on
Commit
1fcdab3
1 Parent(s): b99c699

Reduce memory usage

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -62,7 +62,8 @@ mapping_table_idx_dataset_idx = {}
62
  map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
63
  n_elements = len(map_periodic_table)
64
 
65
- train_df = dataset.to_pandas()
 
66
 
67
  pattern = re.compile(r"(?P<element>[A-Z][a-z]?)(?P<count>\d*)")
68
  extracted = train_df["chemical_formula_descriptive"].str.extractall(pattern)
@@ -79,6 +80,7 @@ wide_df = extracted.reset_index().pivot_table( # Move index to columns for pivo
79
  all_elements = [el.symbol for el in periodictable.elements] # full element list
80
  wide_df = wide_df.reindex(columns=all_elements, fill_value=0)
81
 
 
82
  dataset_index = wide_df.values
83
 
84
  dataset_index = dataset_index / np.sum(dataset_index, axis=1)[:, None]
@@ -86,6 +88,7 @@ dataset_index = (
86
  dataset_index / np.linalg.norm(dataset_index, axis=1)[:, None]
87
  ) # Normalize vectors
88
 
 
89
 
90
  # Initialize the Dash app
91
  app = dash.Dash(__name__, assets_folder=SETTINGS.ASSETS_PATH)
 
62
  map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
63
  n_elements = len(map_periodic_table)
64
 
65
+ # Preprocessing step to create an index for the dataset
66
+ train_df = dataset.select_columns(["chemical_formula_descriptive"]).to_pandas()
67
 
68
  pattern = re.compile(r"(?P<element>[A-Z][a-z]?)(?P<count>\d*)")
69
  extracted = train_df["chemical_formula_descriptive"].str.extractall(pattern)
 
80
  all_elements = [el.symbol for el in periodictable.elements] # full element list
81
  wide_df = wide_df.reindex(columns=all_elements, fill_value=0)
82
 
83
+
84
  dataset_index = wide_df.values
85
 
86
  dataset_index = dataset_index / np.sum(dataset_index, axis=1)[:, None]
 
88
  dataset_index / np.linalg.norm(dataset_index, axis=1)[:, None]
89
  ) # Normalize vectors
90
 
91
+ del train_df, extracted, wide_df
92
 
93
  # Initialize the Dash app
94
  app = dash.Dash(__name__, assets_folder=SETTINGS.ASSETS_PATH)