Spaces:
Build error
Build error
Pietro Lesci
commited on
Commit
•
c718eb8
1
Parent(s):
e8a4a19
update
Browse files- src/configs.py +1 -1
- src/pages/home.py +7 -1
- src/preprocessing.py +1 -1
src/configs.py
CHANGED
@@ -33,4 +33,4 @@ class Languages(Enum):
|
|
33 |
class SupportedFiles(Enum):
|
34 |
xlsx = (lambda x: pd.read_excel(x, dtype=str),)
|
35 |
csv = (lambda x: pd.read_csv(x, dtype=str),)
|
36 |
-
parquet = (lambda x: pd.read_parquet(x
|
|
|
33 |
class SupportedFiles(Enum):
|
34 |
xlsx = (lambda x: pd.read_excel(x, dtype=str),)
|
35 |
csv = (lambda x: pd.read_csv(x, dtype=str),)
|
36 |
+
parquet = (lambda x: pd.read_parquet(x),)
|
src/pages/home.py
CHANGED
@@ -108,7 +108,7 @@ def write(session, uploaded_file):
|
|
108 |
pre_steps = pre_steps_elem.multiselect(
|
109 |
"Select pre-lemmatization preprocessing steps (ordered)",
|
110 |
options=steps_options,
|
111 |
-
default=steps_options
|
112 |
format_func=lambda x: x.replace("_", " ").title(),
|
113 |
key=session.run_id,
|
114 |
)
|
@@ -146,6 +146,8 @@ def write(session, uploaded_file):
|
|
146 |
post_steps=post_steps,
|
147 |
)
|
148 |
|
|
|
|
|
149 |
# ==== 3. PROVIDE FEEDBACK ON OPTIONS ==== #
|
150 |
if show_sample and not (label_column and text_column):
|
151 |
st.warning("Please select `label` and `text` columns")
|
@@ -155,6 +157,8 @@ def write(session, uploaded_file):
|
|
155 |
sample_data[f"preprocessed_{text_column}"] = preprocessing_pipeline(
|
156 |
sample_data[text_column]
|
157 |
).values
|
|
|
|
|
158 |
st.table(
|
159 |
sample_data.loc[
|
160 |
:, [label_column, text_column, f"preprocessed_{text_column}"]
|
@@ -174,6 +178,8 @@ def write(session, uploaded_file):
|
|
174 |
data[text_column]
|
175 |
).values
|
176 |
|
|
|
|
|
177 |
inputs = encode(data[f"preprocessed_{text_column}"], data[label_column])
|
178 |
session.posdf, session.negdf = wordifier(**inputs)
|
179 |
st.success("Wordified!")
|
|
|
108 |
pre_steps = pre_steps_elem.multiselect(
|
109 |
"Select pre-lemmatization preprocessing steps (ordered)",
|
110 |
options=steps_options,
|
111 |
+
default=steps_options,
|
112 |
format_func=lambda x: x.replace("_", " ").title(),
|
113 |
key=session.run_id,
|
114 |
)
|
|
|
146 |
post_steps=post_steps,
|
147 |
)
|
148 |
|
149 |
+
print(preprocessing_pipeline.pre_steps)
|
150 |
+
|
151 |
# ==== 3. PROVIDE FEEDBACK ON OPTIONS ==== #
|
152 |
if show_sample and not (label_column and text_column):
|
153 |
st.warning("Please select `label` and `text` columns")
|
|
|
157 |
sample_data[f"preprocessed_{text_column}"] = preprocessing_pipeline(
|
158 |
sample_data[text_column]
|
159 |
).values
|
160 |
+
|
161 |
+
print(sample_data)
|
162 |
st.table(
|
163 |
sample_data.loc[
|
164 |
:, [label_column, text_column, f"preprocessed_{text_column}"]
|
|
|
178 |
data[text_column]
|
179 |
).values
|
180 |
|
181 |
+
print(data.head())
|
182 |
+
|
183 |
inputs = encode(data[f"preprocessed_{text_column}"], data[label_column])
|
184 |
session.posdf, session.negdf = wordifier(**inputs)
|
185 |
st.success("Wordified!")
|
src/preprocessing.py
CHANGED
@@ -115,7 +115,7 @@ class Lemmatizer:
|
|
115 |
elif remove_stop and not lemmatization:
|
116 |
|
117 |
def lemmatizer_fn(doc: spacy.tokens.doc.Doc) -> str:
|
118 |
-
return " ".join([t for t in doc if not t.is_stop])
|
119 |
|
120 |
elif lemmatization and not remove_stop:
|
121 |
|
|
|
115 |
elif remove_stop and not lemmatization:
|
116 |
|
117 |
def lemmatizer_fn(doc: spacy.tokens.doc.Doc) -> str:
|
118 |
+
return " ".join([t.text for t in doc if not t.is_stop])
|
119 |
|
120 |
elif lemmatization and not remove_stop:
|
121 |
|