zhibinlu
/

vgcn-bert-distilbert-base-uncased

Feature Extraction

Model card Files Files and versions Community

Zhibin Lu commited on Jul 3, 2023

Commit

50c753d

•

1 Parent(s): 777dbcc

some comments

Files changed (2) hide show

configuration_vgcn_bert.py +0 -1
modeling_vgcn_bert.py +3 -1

configuration_vgcn_bert.py CHANGED Viewed

@@ -1,5 +1,4 @@
 # coding=utf-8
-# Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

 # coding=utf-8
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

modeling_vgcn_bert.py CHANGED Viewed

@@ -404,13 +404,15 @@ def _build_predefined_graph(
 # TODO: build knowledge graph from a list of RDF triples
-# def _build_knowledge_graph
 class WordGraphBuilder:
     """
     Word graph based on adjacency matrix, construct from text samples or pre-defined word-pair relations
     Params:
         `rows`: List[str] of text samples, or pre-defined word-pair relations: List[Tuple[str, str, float]]
         `tokenizer`: The same pretrained tokenizer that is used for the model late.

 # TODO: build knowledge graph from a list of RDF triples
 class WordGraphBuilder:
     """
     Word graph based on adjacency matrix, construct from text samples or pre-defined word-pair relations
+    You may (or not) first preprocess the text before build the graph,
+    e.g. Stopword removal, String cleaning, Stemming, Nomolization, Lemmatization
     Params:
         `rows`: List[str] of text samples, or pre-defined word-pair relations: List[Tuple[str, str, float]]
         `tokenizer`: The same pretrained tokenizer that is used for the model late.