Zhibin Lu commited on
Commit
50c753d
1 Parent(s): 777dbcc

some comments

Browse files
configuration_vgcn_bert.py CHANGED
@@ -1,5 +1,4 @@
1
  # coding=utf-8
2
- # Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
3
  #
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
 
1
  # coding=utf-8
 
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
  # you may not use this file except in compliance with the License.
modeling_vgcn_bert.py CHANGED
@@ -404,13 +404,15 @@ def _build_predefined_graph(
404
 
405
 
406
  # TODO: build knowledge graph from a list of RDF triples
407
- # def _build_knowledge_graph
408
 
409
 
410
  class WordGraphBuilder:
411
  """
412
  Word graph based on adjacency matrix, construct from text samples or pre-defined word-pair relations
413
 
 
 
 
414
  Params:
415
  `rows`: List[str] of text samples, or pre-defined word-pair relations: List[Tuple[str, str, float]]
416
  `tokenizer`: The same pretrained tokenizer that is used for the model late.
 
404
 
405
 
406
  # TODO: build knowledge graph from a list of RDF triples
 
407
 
408
 
409
  class WordGraphBuilder:
410
  """
411
  Word graph based on adjacency matrix, construct from text samples or pre-defined word-pair relations
412
 
413
+ You may (or not) first preprocess the text before build the graph,
414
+ e.g. Stopword removal, String cleaning, Stemming, Nomolization, Lemmatization
415
+
416
  Params:
417
  `rows`: List[str] of text samples, or pre-defined word-pair relations: List[Tuple[str, str, float]]
418
  `tokenizer`: The same pretrained tokenizer that is used for the model late.