Spaces:
Sleeping
Sleeping
Merge branch 'main' into add-pdf-viewer
Browse files
document_qa/grobid_processors.py
CHANGED
@@ -467,6 +467,11 @@ class GrobidMaterialsProcessor(BaseProcessor):
|
|
467 |
if status != 200:
|
468 |
result = []
|
469 |
|
|
|
|
|
|
|
|
|
|
|
470 |
compositions = []
|
471 |
for material in result:
|
472 |
if 'resolvedFormulas' in material:
|
@@ -476,7 +481,8 @@ class GrobidMaterialsProcessor(BaseProcessor):
|
|
476 |
elif 'formula' in material:
|
477 |
if 'formulaComposition' in material['formula']:
|
478 |
compositions.append(material['formula']['formulaComposition'])
|
479 |
-
|
|
|
480 |
return compositions
|
481 |
|
482 |
@staticmethod
|
@@ -514,6 +520,12 @@ class GrobidAggregationProcessor(GrobidProcessor, GrobidQuantitiesProcessor, Gro
|
|
514 |
entities = self.prune_overlapping_annotations(all_entities)
|
515 |
return entities
|
516 |
|
|
|
|
|
|
|
|
|
|
|
|
|
517 |
@staticmethod
|
518 |
def prune_overlapping_annotations(entities: list) -> list:
|
519 |
# Sorting by offsets
|
|
|
467 |
if status != 200:
|
468 |
result = []
|
469 |
|
470 |
+
compositions = self.output_info(result)
|
471 |
+
|
472 |
+
return compositions
|
473 |
+
|
474 |
+
def output_info(self, result):
|
475 |
compositions = []
|
476 |
for material in result:
|
477 |
if 'resolvedFormulas' in material:
|
|
|
481 |
elif 'formula' in material:
|
482 |
if 'formulaComposition' in material['formula']:
|
483 |
compositions.append(material['formula']['formulaComposition'])
|
484 |
+
if 'name' in material:
|
485 |
+
compositions.append(material['name'])
|
486 |
return compositions
|
487 |
|
488 |
@staticmethod
|
|
|
520 |
entities = self.prune_overlapping_annotations(all_entities)
|
521 |
return entities
|
522 |
|
523 |
+
def extract_quantities(self, text):
|
524 |
+
return self.gqp.extract_quantities(text)
|
525 |
+
|
526 |
+
def extract_materials(self, text):
|
527 |
+
return self.gmp.extract_materials(text)
|
528 |
+
|
529 |
@staticmethod
|
530 |
def prune_overlapping_annotations(entities: list) -> list:
|
531 |
# Sorting by offsets
|