Instructions to use browndw/en_docusco_spacy with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- spaCy
How to use browndw/en_docusco_spacy with spaCy:
!pip install https://huggingface.co/browndw/en_docusco_spacy/resolve/main/en_docusco_spacy-any-py3-none-any.whl # Using spacy.load(). import spacy nlp = spacy.load("en_docusco_spacy") # Importing as module. import en_docusco_spacy nlp = en_docusco_spacy.load() - Notebooks
- Google Colab
- Kaggle
Update spaCy pipeline
Browse files- README.md +12 -12
- config.cfg +1 -1
- en_docusco_spacy-any-py3-none-any.whl +2 -2
- meta.json +118 -118
- ner/model +1 -1
- ner/moves +1 -1
- tagger/model +1 -1
- tok2vec/model +1 -1
- vocab/strings.json +2 -2
README.md
CHANGED
|
@@ -14,27 +14,27 @@ model-index:
|
|
| 14 |
metrics:
|
| 15 |
- name: NER Precision
|
| 16 |
type: precision
|
| 17 |
-
value: 0.
|
| 18 |
- name: NER Recall
|
| 19 |
type: recall
|
| 20 |
-
value: 0.
|
| 21 |
- name: NER F Score
|
| 22 |
type: f_score
|
| 23 |
-
value: 0.
|
| 24 |
- task:
|
| 25 |
name: TAG
|
| 26 |
type: token-classification
|
| 27 |
metrics:
|
| 28 |
- name: TAG (XPOS) Accuracy
|
| 29 |
type: accuracy
|
| 30 |
-
value: 0.
|
| 31 |
---
|
| 32 |
English pipeline for part-of-speech and rhetorical tagging.
|
| 33 |
|
| 34 |
| Feature | Description |
|
| 35 |
| --- | --- |
|
| 36 |
| **Name** | `en_docusco_spacy` |
|
| 37 |
-
| **Version** | `1.
|
| 38 |
| **spaCy** | `>=3.5.0,<3.6.0` |
|
| 39 |
| **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
|
| 40 |
| **Components** | `tok2vec`, `tagger`, `ner` |
|
|
@@ -60,10 +60,10 @@ English pipeline for part-of-speech and rhetorical tagging.
|
|
| 60 |
|
| 61 |
| Type | Score |
|
| 62 |
| --- | --- |
|
| 63 |
-
| `TAG_ACC` |
|
| 64 |
-
| `ENTS_F` | 79.
|
| 65 |
-
| `ENTS_P` |
|
| 66 |
-
| `ENTS_R` | 79.
|
| 67 |
-
| `TOK2VEC_LOSS` |
|
| 68 |
-
| `TAGGER_LOSS` |
|
| 69 |
-
| `NER_LOSS` |
|
|
|
|
| 14 |
metrics:
|
| 15 |
- name: NER Precision
|
| 16 |
type: precision
|
| 17 |
+
value: 0.798987704
|
| 18 |
- name: NER Recall
|
| 19 |
type: recall
|
| 20 |
+
value: 0.7954112218
|
| 21 |
- name: NER F Score
|
| 22 |
type: f_score
|
| 23 |
+
value: 0.7971954516
|
| 24 |
- task:
|
| 25 |
name: TAG
|
| 26 |
type: token-classification
|
| 27 |
metrics:
|
| 28 |
- name: TAG (XPOS) Accuracy
|
| 29 |
type: accuracy
|
| 30 |
+
value: 0.9698599662
|
| 31 |
---
|
| 32 |
English pipeline for part-of-speech and rhetorical tagging.
|
| 33 |
|
| 34 |
| Feature | Description |
|
| 35 |
| --- | --- |
|
| 36 |
| **Name** | `en_docusco_spacy` |
|
| 37 |
+
| **Version** | `1.3` |
|
| 38 |
| **spaCy** | `>=3.5.0,<3.6.0` |
|
| 39 |
| **Default Pipeline** | `tok2vec`, `tagger`, `ner` |
|
| 40 |
| **Components** | `tok2vec`, `tagger`, `ner` |
|
|
|
|
| 60 |
|
| 61 |
| Type | Score |
|
| 62 |
| --- | --- |
|
| 63 |
+
| `TAG_ACC` | 96.99 |
|
| 64 |
+
| `ENTS_F` | 79.72 |
|
| 65 |
+
| `ENTS_P` | 79.90 |
|
| 66 |
+
| `ENTS_R` | 79.54 |
|
| 67 |
+
| `TOK2VEC_LOSS` | 20924847.53 |
|
| 68 |
+
| `TAGGER_LOSS` | 1316790.55 |
|
| 69 |
+
| `NER_LOSS` | 5818469.98 |
|
config.cfg
CHANGED
|
@@ -104,7 +104,7 @@ dropout = 0.1
|
|
| 104 |
accumulate_gradient = 1
|
| 105 |
patience = 1600
|
| 106 |
max_epochs = 0
|
| 107 |
-
max_steps =
|
| 108 |
eval_frequency = 250
|
| 109 |
frozen_components = []
|
| 110 |
annotating_components = []
|
|
|
|
| 104 |
accumulate_gradient = 1
|
| 105 |
patience = 1600
|
| 106 |
max_epochs = 0
|
| 107 |
+
max_steps = 40000
|
| 108 |
eval_frequency = 250
|
| 109 |
frozen_components = []
|
| 110 |
annotating_components = []
|
en_docusco_spacy-any-py3-none-any.whl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d5b44d15d93b41b27c0650f71553ece8709eb2717910922fb129809e9423d54
|
| 3 |
+
size 7501545
|
meta.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"lang":"en",
|
| 3 |
"name":"docusco_spacy",
|
| 4 |
-
"version":"1.
|
| 5 |
"description":"English pipeline for part-of-speech and rhetorical tagging.",
|
| 6 |
"author":"David Brown",
|
| 7 |
"email":"dwb2@andrew.cmu.edu",
|
|
@@ -346,195 +346,195 @@
|
|
| 346 |
|
| 347 |
],
|
| 348 |
"performance":{
|
| 349 |
-
"tag_acc":0.
|
| 350 |
-
"ents_f":0.
|
| 351 |
-
"ents_p":0.
|
| 352 |
-
"ents_r":0.
|
| 353 |
"ents_per_type":{
|
| 354 |
"Contingent":{
|
| 355 |
-
"p":0.
|
| 356 |
-
"r":0.
|
| 357 |
-
"f":0.
|
| 358 |
},
|
| 359 |
"InformationExposition":{
|
| 360 |
-
"p":0.
|
| 361 |
-
"r":0.
|
| 362 |
-
"f":0.
|
| 363 |
},
|
| 364 |
"AcademicTerms":{
|
| 365 |
-
"p":0.
|
| 366 |
-
"r":0.
|
| 367 |
-
"f":0.
|
| 368 |
},
|
| 369 |
"ForceStressed":{
|
| 370 |
-
"p":0.
|
| 371 |
-
"r":0.
|
| 372 |
-
"f":0.
|
| 373 |
},
|
| 374 |
"Character":{
|
| 375 |
-
"p":0.
|
| 376 |
-
"r":0.
|
| 377 |
-
"f":0.
|
| 378 |
},
|
| 379 |
"Narrative":{
|
| 380 |
-
"p":0.
|
| 381 |
-
"r":0.
|
| 382 |
-
"f":0.
|
| 383 |
},
|
| 384 |
"Strategic":{
|
| 385 |
-
"p":0.
|
| 386 |
-
"r":0.
|
| 387 |
-
"f":0.
|
| 388 |
},
|
| 389 |
"MetadiscourseInteractive":{
|
| 390 |
-
"p":0.
|
| 391 |
-
"r":0.
|
| 392 |
-
"f":0.
|
| 393 |
},
|
| 394 |
"Facilitate":{
|
| 395 |
-
"p":0.
|
| 396 |
-
"r":0.
|
| 397 |
-
"f":0.
|
| 398 |
},
|
| 399 |
"Negative":{
|
| 400 |
-
"p":0.
|
| 401 |
-
"r":0.
|
| 402 |
-
"f":0.
|
| 403 |
},
|
| 404 |
"Interactive":{
|
| 405 |
-
"p":0.
|
| 406 |
-
"r":0.
|
| 407 |
-
"f":0.
|
| 408 |
},
|
| 409 |
"MetadiscourseCohesive":{
|
| 410 |
-
"p":0.
|
| 411 |
-
"r":0.
|
| 412 |
-
"f":0.
|
| 413 |
},
|
| 414 |
"Description":{
|
| 415 |
-
"p":0.
|
| 416 |
-
"r":0.
|
| 417 |
-
"f":0.
|
| 418 |
},
|
| 419 |
"PublicTerms":{
|
| 420 |
-
"p":0.
|
| 421 |
-
"r":0.
|
| 422 |
-
"f":0.
|
| 423 |
},
|
| 424 |
"Reasoning":{
|
| 425 |
-
"p":0.
|
| 426 |
-
"r":0.
|
| 427 |
-
"f":0.
|
| 428 |
},
|
| 429 |
"Positive":{
|
| 430 |
-
"p":0.
|
| 431 |
-
"r":0.
|
| 432 |
-
"f":0.
|
| 433 |
},
|
| 434 |
"Updates":{
|
| 435 |
-
"p":0.
|
| 436 |
-
"r":0.
|
| 437 |
-
"f":0.
|
| 438 |
},
|
| 439 |
"InformationTopics":{
|
| 440 |
-
"p":0.
|
| 441 |
-
"r":0.
|
| 442 |
-
"f":0.
|
| 443 |
},
|
| 444 |
"ConfidenceHigh":{
|
| 445 |
-
"p":0.
|
| 446 |
-
"r":0.
|
| 447 |
-
"f":0.
|
| 448 |
},
|
| 449 |
"Citation":{
|
| 450 |
-
"p":0.
|
| 451 |
-
"r":0.
|
| 452 |
-
"f":0.
|
| 453 |
},
|
| 454 |
"ConfidenceHedged":{
|
| 455 |
-
"p":0.
|
| 456 |
-
"r":0.
|
| 457 |
-
"f":0.
|
| 458 |
},
|
| 459 |
"InformationChange":{
|
| 460 |
-
"p":0.
|
| 461 |
-
"r":0.
|
| 462 |
-
"f":0.
|
| 463 |
},
|
| 464 |
"InformationStates":{
|
| 465 |
-
"p":0.
|
| 466 |
-
"r":0.
|
| 467 |
-
"f":0.
|
| 468 |
},
|
| 469 |
"FirstPerson":{
|
| 470 |
-
"p":0.
|
| 471 |
-
"r":0.
|
| 472 |
-
"f":0.
|
| 473 |
},
|
| 474 |
"Responsibility":{
|
| 475 |
-
"p":0.
|
| 476 |
-
"r":0.
|
| 477 |
-
"f":0.
|
| 478 |
},
|
| 479 |
"Inquiry":{
|
| 480 |
-
"p":0.
|
| 481 |
-
"r":0.
|
| 482 |
-
"f":0.
|
| 483 |
},
|
| 484 |
"InformationChangeNegative":{
|
| 485 |
-
"p":0.
|
| 486 |
-
"r":0.
|
| 487 |
-
"f":0.
|
| 488 |
},
|
| 489 |
"ConfidenceLow":{
|
| 490 |
-
"p":0.
|
| 491 |
-
"r":0.
|
| 492 |
-
"f":0.
|
| 493 |
},
|
| 494 |
"InformationPlace":{
|
| 495 |
-
"p":0.
|
| 496 |
-
"r":0.
|
| 497 |
-
"f":0.
|
| 498 |
-
},
|
| 499 |
-
"InformationReportVerbs":{
|
| 500 |
-
"p":0.7508147746,
|
| 501 |
-
"r":0.7912701252,
|
| 502 |
-
"f":0.7705117932
|
| 503 |
},
|
| 504 |
"Future":{
|
| 505 |
-
"p":0.
|
| 506 |
-
"r":0.
|
| 507 |
-
"f":0.
|
| 508 |
},
|
| 509 |
"AcademicWritingMoves":{
|
| 510 |
-
"p":0.
|
| 511 |
-
"r":0.
|
| 512 |
-
"f":0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
},
|
| 514 |
"Uncertainty":{
|
| 515 |
-
"p":0.
|
| 516 |
-
"r":0.
|
| 517 |
-
"f":0.
|
| 518 |
},
|
| 519 |
"CitationHedged":{
|
| 520 |
-
"p":0.
|
| 521 |
-
"r":0.
|
| 522 |
-
"f":0.
|
| 523 |
},
|
| 524 |
"CitationAuthority":{
|
| 525 |
-
"p":0.
|
| 526 |
-
"r":0.
|
| 527 |
-
"f":0.
|
| 528 |
},
|
| 529 |
"InformationChangePositive":{
|
| 530 |
-
"p":0.
|
| 531 |
-
"r":0.
|
| 532 |
-
"f":0.
|
| 533 |
}
|
| 534 |
},
|
| 535 |
-
"tok2vec_loss":
|
| 536 |
-
"tagger_loss":
|
| 537 |
-
"ner_loss":
|
| 538 |
},
|
| 539 |
"requirements":[
|
| 540 |
|
|
|
|
| 1 |
{
|
| 2 |
"lang":"en",
|
| 3 |
"name":"docusco_spacy",
|
| 4 |
+
"version":"1.3",
|
| 5 |
"description":"English pipeline for part-of-speech and rhetorical tagging.",
|
| 6 |
"author":"David Brown",
|
| 7 |
"email":"dwb2@andrew.cmu.edu",
|
|
|
|
| 346 |
|
| 347 |
],
|
| 348 |
"performance":{
|
| 349 |
+
"tag_acc":0.9698599662,
|
| 350 |
+
"ents_f":0.7971954516,
|
| 351 |
+
"ents_p":0.798987704,
|
| 352 |
+
"ents_r":0.7954112218,
|
| 353 |
"ents_per_type":{
|
| 354 |
"Contingent":{
|
| 355 |
+
"p":0.818815331,
|
| 356 |
+
"r":0.782463929,
|
| 357 |
+
"f":0.8002270148
|
| 358 |
},
|
| 359 |
"InformationExposition":{
|
| 360 |
+
"p":0.8498392228,
|
| 361 |
+
"r":0.857557341,
|
| 362 |
+
"f":0.8536808374
|
| 363 |
},
|
| 364 |
"AcademicTerms":{
|
| 365 |
+
"p":0.8128432795,
|
| 366 |
+
"r":0.8176600252,
|
| 367 |
+
"f":0.8152445377
|
| 368 |
},
|
| 369 |
"ForceStressed":{
|
| 370 |
+
"p":0.8036501362,
|
| 371 |
+
"r":0.7950043821,
|
| 372 |
+
"f":0.7993038804
|
| 373 |
},
|
| 374 |
"Character":{
|
| 375 |
+
"p":0.8509934653,
|
| 376 |
+
"r":0.8578615428,
|
| 377 |
+
"f":0.8544137022
|
| 378 |
},
|
| 379 |
"Narrative":{
|
| 380 |
+
"p":0.7922994384,
|
| 381 |
+
"r":0.7865517992,
|
| 382 |
+
"f":0.789415157
|
| 383 |
},
|
| 384 |
"Strategic":{
|
| 385 |
+
"p":0.74291956,
|
| 386 |
+
"r":0.7049416991,
|
| 387 |
+
"f":0.7234325438
|
| 388 |
},
|
| 389 |
"MetadiscourseInteractive":{
|
| 390 |
+
"p":0.8243080626,
|
| 391 |
+
"r":0.7077258639,
|
| 392 |
+
"f":0.761581223
|
| 393 |
},
|
| 394 |
"Facilitate":{
|
| 395 |
+
"p":0.7420591457,
|
| 396 |
+
"r":0.6909739929,
|
| 397 |
+
"f":0.7156060206
|
| 398 |
},
|
| 399 |
"Negative":{
|
| 400 |
+
"p":0.7366169936,
|
| 401 |
+
"r":0.6818932229,
|
| 402 |
+
"f":0.7081995321
|
| 403 |
},
|
| 404 |
"Interactive":{
|
| 405 |
+
"p":0.8438560526,
|
| 406 |
+
"r":0.8501978617,
|
| 407 |
+
"f":0.8470150867
|
| 408 |
},
|
| 409 |
"MetadiscourseCohesive":{
|
| 410 |
+
"p":0.9307703425,
|
| 411 |
+
"r":0.9246894967,
|
| 412 |
+
"f":0.9277199553
|
| 413 |
},
|
| 414 |
"Description":{
|
| 415 |
+
"p":0.7184076094,
|
| 416 |
+
"r":0.7692427259,
|
| 417 |
+
"f":0.7429566137
|
| 418 |
},
|
| 419 |
"PublicTerms":{
|
| 420 |
+
"p":0.8250038862,
|
| 421 |
+
"r":0.8023660141,
|
| 422 |
+
"f":0.8135274957
|
| 423 |
},
|
| 424 |
"Reasoning":{
|
| 425 |
+
"p":0.8453436321,
|
| 426 |
+
"r":0.8060425995,
|
| 427 |
+
"f":0.8252254568
|
| 428 |
},
|
| 429 |
"Positive":{
|
| 430 |
+
"p":0.7428654449,
|
| 431 |
+
"r":0.6843594646,
|
| 432 |
+
"f":0.7124132921
|
| 433 |
},
|
| 434 |
"Updates":{
|
| 435 |
+
"p":0.7921472679,
|
| 436 |
+
"r":0.7476358038,
|
| 437 |
+
"f":0.7692481756
|
| 438 |
},
|
| 439 |
"InformationTopics":{
|
| 440 |
+
"p":0.7997236338,
|
| 441 |
+
"r":0.8110942833,
|
| 442 |
+
"f":0.8053688262
|
| 443 |
},
|
| 444 |
"ConfidenceHigh":{
|
| 445 |
+
"p":0.7693539348,
|
| 446 |
+
"r":0.7862870234,
|
| 447 |
+
"f":0.7777283211
|
| 448 |
},
|
| 449 |
"Citation":{
|
| 450 |
+
"p":0.8227242525,
|
| 451 |
+
"r":0.7993544222,
|
| 452 |
+
"f":0.8108709889
|
| 453 |
},
|
| 454 |
"ConfidenceHedged":{
|
| 455 |
+
"p":0.8354197349,
|
| 456 |
+
"r":0.8900831633,
|
| 457 |
+
"f":0.8618855884
|
| 458 |
},
|
| 459 |
"InformationChange":{
|
| 460 |
+
"p":0.7294003868,
|
| 461 |
+
"r":0.7230371009,
|
| 462 |
+
"f":0.7262048048
|
| 463 |
},
|
| 464 |
"InformationStates":{
|
| 465 |
+
"p":0.8306426735,
|
| 466 |
+
"r":0.8544531415,
|
| 467 |
+
"f":0.8423796861
|
| 468 |
},
|
| 469 |
"FirstPerson":{
|
| 470 |
+
"p":0.8819685753,
|
| 471 |
+
"r":0.9076103856,
|
| 472 |
+
"f":0.8946057773
|
| 473 |
},
|
| 474 |
"Responsibility":{
|
| 475 |
+
"p":0.7287275566,
|
| 476 |
+
"r":0.6496172582,
|
| 477 |
+
"f":0.6869021339
|
| 478 |
},
|
| 479 |
"Inquiry":{
|
| 480 |
+
"p":0.6458673322,
|
| 481 |
+
"r":0.6369402632,
|
| 482 |
+
"f":0.6413727359
|
| 483 |
},
|
| 484 |
"InformationChangeNegative":{
|
| 485 |
+
"p":0.738317757,
|
| 486 |
+
"r":0.532285233,
|
| 487 |
+
"f":0.6185970636
|
| 488 |
},
|
| 489 |
"ConfidenceLow":{
|
| 490 |
+
"p":0.8554216867,
|
| 491 |
+
"r":0.5059382423,
|
| 492 |
+
"f":0.6358208955
|
| 493 |
},
|
| 494 |
"InformationPlace":{
|
| 495 |
+
"p":0.8853535824,
|
| 496 |
+
"r":0.8875035024,
|
| 497 |
+
"f":0.8864272388
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
},
|
| 499 |
"Future":{
|
| 500 |
+
"p":0.7515119175,
|
| 501 |
+
"r":0.7649827992,
|
| 502 |
+
"f":0.758187528
|
| 503 |
},
|
| 504 |
"AcademicWritingMoves":{
|
| 505 |
+
"p":0.6664195701,
|
| 506 |
+
"r":0.4892517007,
|
| 507 |
+
"f":0.5642554527
|
| 508 |
+
},
|
| 509 |
+
"InformationReportVerbs":{
|
| 510 |
+
"p":0.7737441669,
|
| 511 |
+
"r":0.8067978533,
|
| 512 |
+
"f":0.7899253862
|
| 513 |
},
|
| 514 |
"Uncertainty":{
|
| 515 |
+
"p":0.7402862986,
|
| 516 |
+
"r":0.6408700051,
|
| 517 |
+
"f":0.6870001356
|
| 518 |
},
|
| 519 |
"CitationHedged":{
|
| 520 |
+
"p":0.7630662021,
|
| 521 |
+
"r":0.9399141631,
|
| 522 |
+
"f":0.8423076923
|
| 523 |
},
|
| 524 |
"CitationAuthority":{
|
| 525 |
+
"p":0.7866273353,
|
| 526 |
+
"r":0.5972377753,
|
| 527 |
+
"f":0.6789730533
|
| 528 |
},
|
| 529 |
"InformationChangePositive":{
|
| 530 |
+
"p":0.7317845829,
|
| 531 |
+
"r":0.605592776,
|
| 532 |
+
"f":0.6627350972
|
| 533 |
}
|
| 534 |
},
|
| 535 |
+
"tok2vec_loss":209248.4752924392,
|
| 536 |
+
"tagger_loss":13167.9055271149,
|
| 537 |
+
"ner_loss":58184.6998399578
|
| 538 |
},
|
| 539 |
"requirements":[
|
| 540 |
|
ner/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 163912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7aba221a3d579504169f52262fd70ea8a121a9f4e1c6f1fd186ede35abfbe5fe
|
| 3 |
size 163912
|
ner/moves
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
��moves�
|
|
|
|
| 1 |
+
��moves�
|
tagger/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 105978
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:461db1e078111078173d225b54ffabd42ad232623f2dcf885ce021825852e07e
|
| 3 |
size 105978
|
tok2vec/model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6009091
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e68cf632c7b7e0720b53ee8c470444f0f69e01ec4044babcce83294c0e02d9d
|
| 3 |
size 6009091
|
vocab/strings.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2397c5f890da06f450cc9102a3d46437e4927baf67e92c6e0b502c9add35a474
|
| 3 |
+
size 6614972
|