File size: 4,151 Bytes
d4fa000
 
 
7269ad3
 
 
 
 
d4fa000
7269ad3
 
 
 
 
d4fa000
 
 
 
5fb3e98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4863ec
5fb3e98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee839a2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
widget:
- text: "[METAKEYWORD] [TITLE] [META] [ABOUT] [HOME] welcome to our website where we explore innovative technologies for a sustainable future."
  output:
    - label: POSITIVE
      score: 0.8
    - label: NEGATIVE
      score: 0.2
- text: "[METAKEYWORD] [TITLE] [META] [ABOUT] [HOME] This is cell phone marketplace"
  output:
    - label: POSITIVE
      score: 0.1
    - label: NEGATIVE
      score: 0.9
---



## Examples

Here are some examples of how to use this model in Python:

```python
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("Rel8ed/cleantech-cls")
model = AutoModelForCausalLM.from_pretrained("Rel8ed/cleantech-cls")

input_prompt = "[METAKEYWORD] innovation, technology, clean energy [TITLE] innovative clean energy solutions [META]" \
          "leading provider of clean energy solutions. [ABOUT] we are committed to reducing environmental impact through" \
          "cutting-edge clean energy solutions. [HOME] welcome to our website where we explore innovative technologies for a sustainable future."

inputs = tokenizer.encode(input_prompt, return_tensors='pt')
output = model.generate(inputs, max_length=50, num_return_sequences=5)

print("Generated text:")
for i, output in enumerate(outputs):
    print(f"{i+1}: {tokenizer.decode(output, skip_special_tokens=True)}")
```

## Preprocess text

```python
import re

def normalize(s, truncate=100):
    # Replace "\n" with "  "
    s = s.replace("\n", "  ")
    
    # Keep only letters (including accented letters) and spaces
    s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s)
    
    # Split the string into words, truncate to the first 100 words, and join back into a string
    words = s.split()
    truncated = words[:truncate]
    s = " ".join(truncated)
    
    # Remove additional spaces
    s = re.sub(r"\s+", " ", s)
    
    return s



def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "",  truncate_limit=100):
    return (
        "[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) +
        " [TITLE] " + normalize(title, truncate=truncate_limit) +
        " [META] " + normalize(meta, truncate=truncate_limit) +
        " [ABOUT] " + normalize(aboutText, truncate=truncate_limit) +
        # Assuming we want to normalize homepageText with a much higher limit or no truncation
        " [HOME] " + normalize(homepageText, truncate=truncate_limit)
    ).strip()

# Sample raw inputs
metakeywords = "Green Energy, Sustainability"
meta = "Exploring innovative solutions for a sustainable future."
homepageText = "Welcome to our green energy platform where we share insights and innovations..."
aboutText = "We are committed to advancing green energy solutions through research and development."
title = "Green Energy Innovations"

# Applying your preprocessing steps
full_text = create_full_text(metakeywords, title, meta, aboutText, homepageText)

print(full_text)
```

## Simple usage

```python
from transformers import pipeline
import re

model_name_or_path = "Rel8ed/cleantech-cls"

classifier = pipeline('text-classification', model=model_name_or_path, max_length=512)

def normalize(s, truncate=100):
    s = s.replace("\n", "  ")
    s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s)
    words = s.split()
    truncated = words[:truncate]
    s = " ".join(truncated)
    s = re.sub(r"\s+", " ", s)
    return s


def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "",  truncate_limit=100):
    return (
        "[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) +
        " [TITLE] " + normalize(title, truncate=truncate_limit) +
        " [META] " + normalize(meta, truncate=truncate_limit) +
        " [ABOUT] " + normalize(aboutText, truncate=truncate_limit) +
        # Assuming we want to normalize homepageText with a much higher limit or no truncation
        " [HOME] " + normalize(homepageText, truncate=truncate_limit)
    ).strip()

text = "Welcome to our green energy platform where we share insights and innovations"

predictions = classifier(create_full_text(text))

```