Spaces:
Runtime error
Runtime error
victormiller
commited on
Commit
•
0cee378
1
Parent(s):
b4e3ff3
Update curated.py
Browse files- curated.py +1 -1
curated.py
CHANGED
@@ -67,7 +67,7 @@ filtering_process = Div(
|
|
67 |
Li("There were two kind of datasets that was downloaded S2ORC and S2ORC abstract"),
|
68 |
),
|
69 |
H4("Filtering - S2ORC"),
|
70 |
-
P("1. Multiple filters are used here after manually verifying output of all the filters as suggested by peS2o dataset")
|
71 |
Ol(
|
72 |
Li("title_abstract: must have title and abstract"),
|
73 |
Li("The paper must be in English. To determine the language of each document, we use the pycld3 library. We run pycld3 on the first 2000 characters of each paragraph in the paper. The language of the paper is the most common language of the paragraphs."),
|
|
|
67 |
Li("There were two kind of datasets that was downloaded S2ORC and S2ORC abstract"),
|
68 |
),
|
69 |
H4("Filtering - S2ORC"),
|
70 |
+
P("1. Multiple filters are used here after manually verifying output of all the filters as suggested by peS2o dataset"),
|
71 |
Ol(
|
72 |
Li("title_abstract: must have title and abstract"),
|
73 |
Li("The paper must be in English. To determine the language of each document, we use the pycld3 library. We run pycld3 on the first 2000 characters of each paragraph in the paper. The language of the paper is the most common language of the paragraphs."),
|