Spaces:
Running
Running
Reduce memory usage
Browse files
app.py
CHANGED
@@ -62,7 +62,8 @@ mapping_table_idx_dataset_idx = {}
|
|
62 |
map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
|
63 |
n_elements = len(map_periodic_table)
|
64 |
|
65 |
-
|
|
|
66 |
|
67 |
pattern = re.compile(r"(?P<element>[A-Z][a-z]?)(?P<count>\d*)")
|
68 |
extracted = train_df["chemical_formula_descriptive"].str.extractall(pattern)
|
@@ -79,6 +80,7 @@ wide_df = extracted.reset_index().pivot_table( # Move index to columns for pivo
|
|
79 |
all_elements = [el.symbol for el in periodictable.elements] # full element list
|
80 |
wide_df = wide_df.reindex(columns=all_elements, fill_value=0)
|
81 |
|
|
|
82 |
dataset_index = wide_df.values
|
83 |
|
84 |
dataset_index = dataset_index / np.sum(dataset_index, axis=1)[:, None]
|
@@ -86,6 +88,7 @@ dataset_index = (
|
|
86 |
dataset_index / np.linalg.norm(dataset_index, axis=1)[:, None]
|
87 |
) # Normalize vectors
|
88 |
|
|
|
89 |
|
90 |
# Initialize the Dash app
|
91 |
app = dash.Dash(__name__, assets_folder=SETTINGS.ASSETS_PATH)
|
|
|
62 |
map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
|
63 |
n_elements = len(map_periodic_table)
|
64 |
|
65 |
+
# Preprocessing step to create an index for the dataset
|
66 |
+
train_df = dataset.select_columns(["chemical_formula_descriptive"]).to_pandas()
|
67 |
|
68 |
pattern = re.compile(r"(?P<element>[A-Z][a-z]?)(?P<count>\d*)")
|
69 |
extracted = train_df["chemical_formula_descriptive"].str.extractall(pattern)
|
|
|
80 |
all_elements = [el.symbol for el in periodictable.elements] # full element list
|
81 |
wide_df = wide_df.reindex(columns=all_elements, fill_value=0)
|
82 |
|
83 |
+
|
84 |
dataset_index = wide_df.values
|
85 |
|
86 |
dataset_index = dataset_index / np.sum(dataset_index, axis=1)[:, None]
|
|
|
88 |
dataset_index / np.linalg.norm(dataset_index, axis=1)[:, None]
|
89 |
) # Normalize vectors
|
90 |
|
91 |
+
del train_df, extracted, wide_df
|
92 |
|
93 |
# Initialize the Dash app
|
94 |
app = dash.Dash(__name__, assets_folder=SETTINGS.ASSETS_PATH)
|