|
--- |
|
widget: |
|
- text: "[METAKEYWORD] [TITLE] [META] [ABOUT] [HOME] welcome to our website where we explore innovative technologies for a sustainable future." |
|
output: |
|
- label: POSITIVE |
|
score: 0.8 |
|
- label: NEGATIVE |
|
score: 0.2 |
|
- text: "[METAKEYWORD] [TITLE] [META] [ABOUT] [HOME] This is cell phone marketplace" |
|
output: |
|
- label: POSITIVE |
|
score: 0.1 |
|
- label: NEGATIVE |
|
score: 0.9 |
|
--- |
|
|
|
|
|
|
|
## Examples |
|
|
|
Here are some examples of how to use this model in Python: |
|
|
|
```python |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Rel8ed/cleantech-cls") |
|
model = AutoModelForCausalLM.from_pretrained("Rel8ed/cleantech-cls") |
|
|
|
input_prompt = "[METAKEYWORD] innovation, technology, clean energy [TITLE] innovative clean energy solutions [META]" \ |
|
"leading provider of clean energy solutions. [ABOUT] we are committed to reducing environmental impact through" \ |
|
"cutting-edge clean energy solutions. [HOME] welcome to our website where we explore innovative technologies for a sustainable future." |
|
|
|
inputs = tokenizer.encode(input_prompt, return_tensors='pt') |
|
output = model.generate(inputs, max_length=50, num_return_sequences=5) |
|
|
|
print("Generated text:") |
|
for i, output in enumerate(outputs): |
|
print(f"{i+1}: {tokenizer.decode(output, skip_special_tokens=True)}") |
|
``` |
|
|
|
## Preprocess text |
|
|
|
```python |
|
import re |
|
|
|
def normalize(s, truncate=100): |
|
# Replace "\n" with " " |
|
s = s.replace("\n", " ") |
|
|
|
# Keep only letters (including accented letters) and spaces |
|
s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s) |
|
|
|
# Split the string into words, truncate to the first 100 words, and join back into a string |
|
words = s.split() |
|
truncated = words[:truncate] |
|
s = " ".join(truncated) |
|
|
|
# Remove additional spaces |
|
s = re.sub(r"\s+", " ", s) |
|
|
|
return s |
|
|
|
|
|
|
|
def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "", truncate_limit=100): |
|
return ( |
|
"[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) + |
|
" [TITLE] " + normalize(title, truncate=truncate_limit) + |
|
" [META] " + normalize(meta, truncate=truncate_limit) + |
|
" [ABOUT] " + normalize(aboutText, truncate=truncate_limit) + |
|
# Assuming we want to normalize homepageText with a much higher limit or no truncation |
|
" [HOME] " + normalize(homepageText, truncate=truncate_limit) |
|
).strip() |
|
|
|
# Sample raw inputs |
|
metakeywords = "Green Energy, Sustainability" |
|
meta = "Exploring innovative solutions for a sustainable future." |
|
homepageText = "Welcome to our green energy platform where we share insights and innovations..." |
|
aboutText = "We are committed to advancing green energy solutions through research and development." |
|
title = "Green Energy Innovations" |
|
|
|
# Applying your preprocessing steps |
|
full_text = create_full_text(metakeywords, title, meta, aboutText, homepageText) |
|
|
|
print(full_text) |
|
``` |
|
|
|
## Simple usage |
|
|
|
```python |
|
from transformers import pipeline |
|
import re |
|
|
|
model_name_or_path = "Rel8ed/cleantech-cls" |
|
|
|
classifier = pipeline('text-classification', model=model_name_or_path, max_length=512) |
|
|
|
def normalize(s, truncate=100): |
|
s = s.replace("\n", " ") |
|
s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s) |
|
words = s.split() |
|
truncated = words[:truncate] |
|
s = " ".join(truncated) |
|
s = re.sub(r"\s+", " ", s) |
|
return s |
|
|
|
|
|
def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "", truncate_limit=100): |
|
return ( |
|
"[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) + |
|
" [TITLE] " + normalize(title, truncate=truncate_limit) + |
|
" [META] " + normalize(meta, truncate=truncate_limit) + |
|
" [ABOUT] " + normalize(aboutText, truncate=truncate_limit) + |
|
# Assuming we want to normalize homepageText with a much higher limit or no truncation |
|
" [HOME] " + normalize(homepageText, truncate=truncate_limit) |
|
).strip() |
|
|
|
text = "Welcome to our green energy platform where we share insights and innovations" |
|
|
|
predictions = classifier(create_full_text(text)) |
|
|
|
``` |
|
|
|
|
|
|