File size: 1,868 Bytes
8752f05
8ee9567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be85188
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from .model import GLiNER

# Initialize GLiNER with the base model
model = GLiNER.from_pretrained("urchade/gliner_mediumv2.1")

# Sample text for entity prediction
text = """
 lenskart m: (0)9428002330 Lenskart Store,Surat m: (0)9723817060) e:lenskartsurat@gmail.com Store Address UG-4.Ascon City.Opp.Maheshwari Bhavan,Citylight,Surat-395007"""

def NER_Model(text):

    labels = ["Person", "Mail", "Number", "Address", "Organization","Designation","Link"]

    # Perform entity prediction
    entities = model.predict_entities(text, labels, threshold=0.5)
    
    # Initialize the processed data dictionary
    processed_data = {            
            "Name": [],
            "Contact": [],
            "Designation": [],
            "Address": [],
            "Link": [],
            "Company": [],
            "Email": [],
            "extracted_text": "",
            }
    
    for entity in entities:

        print(entity["text"], "=>", entity["label"])

        #loading the data into json 
        if entity["label"]==labels[0]:
            processed_data['Name'].extend([entity["text"]])

        if entity["label"]==labels[1]:
            processed_data['Email'].extend([entity["text"]])
        
        if entity["label"]==labels[2]:
            processed_data['Contact'].extend([entity["text"]])

        if entity["label"]==labels[3]:
            processed_data['Address'].extend([entity["text"]])

        if entity["label"]==labels[4]:
            processed_data['Company'].extend([entity["text"]])

        if entity["label"]==labels[5]:
            processed_data['Designation'].extend([entity["text"]])

        if entity["label"]==labels[6]:
            processed_data['Link'].extend([entity["text"]])


    processed_data['Address']=[', '.join(processed_data['Address'])]
    processed_data['extracted_text']=[text]

    return processed_data