Zhibin Lu
commited on
Commit
•
50c753d
1
Parent(s):
777dbcc
some comments
Browse files- configuration_vgcn_bert.py +0 -1
- modeling_vgcn_bert.py +3 -1
configuration_vgcn_bert.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
# coding=utf-8
|
2 |
-
# Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
|
3 |
#
|
4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
# you may not use this file except in compliance with the License.
|
|
|
1 |
# coding=utf-8
|
|
|
2 |
#
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
# you may not use this file except in compliance with the License.
|
modeling_vgcn_bert.py
CHANGED
@@ -404,13 +404,15 @@ def _build_predefined_graph(
|
|
404 |
|
405 |
|
406 |
# TODO: build knowledge graph from a list of RDF triples
|
407 |
-
# def _build_knowledge_graph
|
408 |
|
409 |
|
410 |
class WordGraphBuilder:
|
411 |
"""
|
412 |
Word graph based on adjacency matrix, construct from text samples or pre-defined word-pair relations
|
413 |
|
|
|
|
|
|
|
414 |
Params:
|
415 |
`rows`: List[str] of text samples, or pre-defined word-pair relations: List[Tuple[str, str, float]]
|
416 |
`tokenizer`: The same pretrained tokenizer that is used for the model late.
|
|
|
404 |
|
405 |
|
406 |
# TODO: build knowledge graph from a list of RDF triples
|
|
|
407 |
|
408 |
|
409 |
class WordGraphBuilder:
|
410 |
"""
|
411 |
Word graph based on adjacency matrix, construct from text samples or pre-defined word-pair relations
|
412 |
|
413 |
+
You may (or not) first preprocess the text before build the graph,
|
414 |
+
e.g. Stopword removal, String cleaning, Stemming, Nomolization, Lemmatization
|
415 |
+
|
416 |
Params:
|
417 |
`rows`: List[str] of text samples, or pre-defined word-pair relations: List[Tuple[str, str, float]]
|
418 |
`tokenizer`: The same pretrained tokenizer that is used for the model late.
|