File size: 3,069 Bytes
74a2352
a409eda
74a2352
 
 
 
 
 
 
 
 
a409eda
74a2352
 
 
a409eda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65fca12
74a2352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import spacy
import subprocess
from typing import Tuple
from spacy import displacy
from anytree import Node, RenderTree

class Pipeline:
    def __init__(
        self
    ) -> None:

        self.nlp = None
        self.__ch_html_tree = None
        self.__ch_str_tree = None
        self.__ch_sentence = None
        self.__init_nlp(model="en_core_web_md")
    
    def __init_nlp(
        self,
        model: str
    ) -> None:

        self.nlp = None
        try:
            self.nlp = spacy.load(model)

        except:
            print(f"* Downloading {model} model...")
            _ = subprocess.Popen(
                f"python -m spacy download {model}", 
                stdout=subprocess.PIPE, 
                shell=True
            ).communicate()

            self.nlp = spacy.load(model)

    def __postag(
        self,
        tk: str
    ) -> str:

        tag = ""
        plural_tags = ["NNS", "NNPS"]
        if tk.tag_ in plural_tags:
            tag = " ({}) (Plural)".format(tk.tag_)
        else:
            tag = " ({})".format(tk.tag_)
        return tag
    
    def __genSyntacticTree(
        self,
        expr: str
    ) -> Tuple[str,str]:

        doc = self.nlp(expr)
        root = next(doc.sents).root
        node = Node("" + root.text + ": (Root)" + self.__postag(root), parent=None)

        def tree(
            tk: str, 
            last_node: Node, 
            depth: int
        ) -> None:

            if tk.n_lefts + tk.n_rights > 0:
                for child in tk.children:
                    tree(
                        child, 
                        Node(
                            "" + child.text + ": " + str(depth + 1) + self.__postag(child), 
                            parent=last_node
                        ),
                        depth+1
                    )
        
        tree(root, node, 0)
        syntactic_str_tree = ""

        for pre, fill, node in RenderTree(node):
            syntactic_str_tree += """{}{}\n""".format(pre, node.name)

        syntactic_tree = displacy.render(doc, style='dep', options={'distance': 100})
        syntactic_html_tree = f"""
            <center>
                <div style='max-width: 800px; overflow-x:auto;'>
                    {syntactic_tree}
                </div>
            </center>
        """
        return syntactic_html_tree, syntactic_str_tree

    def compute(
        self,
        sentence: str
    ) -> Tuple[str,str,str]:

        error = ""
        error_template = """
            <center>
                <div class="alert alert-warning" role="alert">
                <h6><b>{}</b></h6>
                </div>
            </center>
        """
        if sentence.strip() == "":
            error = error_template.format("The sentence can not be empty!")
            return error, "", ""

        if sentence != self.__ch_sentence:
            self.__ch_sentence = sentence
            self.__ch_html_tree, self.__ch_str_tree = self.__genSyntacticTree(sentence)

        return error, self.__ch_html_tree, self.__ch_str_tree