File size: 4,151 Bytes
d4fa000 7269ad3 d4fa000 7269ad3 d4fa000 5fb3e98 f4863ec 5fb3e98 ee839a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
---
widget:
- text: "[METAKEYWORD] [TITLE] [META] [ABOUT] [HOME] welcome to our website where we explore innovative technologies for a sustainable future."
output:
- label: POSITIVE
score: 0.8
- label: NEGATIVE
score: 0.2
- text: "[METAKEYWORD] [TITLE] [META] [ABOUT] [HOME] This is cell phone marketplace"
output:
- label: POSITIVE
score: 0.1
- label: NEGATIVE
score: 0.9
---
## Examples
Here are some examples of how to use this model in Python:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("Rel8ed/cleantech-cls")
model = AutoModelForCausalLM.from_pretrained("Rel8ed/cleantech-cls")
input_prompt = "[METAKEYWORD] innovation, technology, clean energy [TITLE] innovative clean energy solutions [META]" \
"leading provider of clean energy solutions. [ABOUT] we are committed to reducing environmental impact through" \
"cutting-edge clean energy solutions. [HOME] welcome to our website where we explore innovative technologies for a sustainable future."
inputs = tokenizer.encode(input_prompt, return_tensors='pt')
output = model.generate(inputs, max_length=50, num_return_sequences=5)
print("Generated text:")
for i, output in enumerate(outputs):
print(f"{i+1}: {tokenizer.decode(output, skip_special_tokens=True)}")
```
## Preprocess text
```python
import re
def normalize(s, truncate=100):
# Replace "\n" with " "
s = s.replace("\n", " ")
# Keep only letters (including accented letters) and spaces
s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s)
# Split the string into words, truncate to the first 100 words, and join back into a string
words = s.split()
truncated = words[:truncate]
s = " ".join(truncated)
# Remove additional spaces
s = re.sub(r"\s+", " ", s)
return s
def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "", truncate_limit=100):
return (
"[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) +
" [TITLE] " + normalize(title, truncate=truncate_limit) +
" [META] " + normalize(meta, truncate=truncate_limit) +
" [ABOUT] " + normalize(aboutText, truncate=truncate_limit) +
# Assuming we want to normalize homepageText with a much higher limit or no truncation
" [HOME] " + normalize(homepageText, truncate=truncate_limit)
).strip()
# Sample raw inputs
metakeywords = "Green Energy, Sustainability"
meta = "Exploring innovative solutions for a sustainable future."
homepageText = "Welcome to our green energy platform where we share insights and innovations..."
aboutText = "We are committed to advancing green energy solutions through research and development."
title = "Green Energy Innovations"
# Applying your preprocessing steps
full_text = create_full_text(metakeywords, title, meta, aboutText, homepageText)
print(full_text)
```
## Simple usage
```python
from transformers import pipeline
import re
model_name_or_path = "Rel8ed/cleantech-cls"
classifier = pipeline('text-classification', model=model_name_or_path, max_length=512)
def normalize(s, truncate=100):
s = s.replace("\n", " ")
s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s)
words = s.split()
truncated = words[:truncate]
s = " ".join(truncated)
s = re.sub(r"\s+", " ", s)
return s
def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "", truncate_limit=100):
return (
"[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) +
" [TITLE] " + normalize(title, truncate=truncate_limit) +
" [META] " + normalize(meta, truncate=truncate_limit) +
" [ABOUT] " + normalize(aboutText, truncate=truncate_limit) +
# Assuming we want to normalize homepageText with a much higher limit or no truncation
" [HOME] " + normalize(homepageText, truncate=truncate_limit)
).strip()
text = "Welcome to our green energy platform where we share insights and innovations"
predictions = classifier(create_full_text(text))
```
|