Rel8ed commited on
Commit
5fb3e98
1 Parent(s): 8d9fcb4

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +104 -0
README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Examples
2
+
3
+ Here are some examples of how to use this model in Python:
4
+
5
+ ```python
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("Rel8ed/cleantech-cls")
9
+ model = AutoModelForCausalLM.from_pretrained("Rel8ed/cleantech-cls")
10
+
11
+ input_prompt = "[METAKEYWORD] innovation, technology, clean energy [TITLE] innovative clean energy solutions [META]" \
12
+ "leading provider of clean energy solutions. [ABOUT] we are committed to reducing environmental impact through" \
13
+ "cutting-edge clean energy solutions. [HOME] welcome to our website where we explore innovative technologies for a sustainable future."
14
+
15
+ inputs = tokenizer.encode(input_prompt, return_tensors='pt')
16
+ output = model.generate(inputs, max_length=50, num_return_sequences=5)
17
+
18
+ print("Generated text:")
19
+ for i, output in enumerate(outputs):
20
+ print(f"{i+1}: {tokenizer.decode(output, skip_special_tokens=True)}")
21
+ ```
22
+
23
+ ## Preprocess text
24
+
25
+ ```python
26
+ import re
27
+
28
+ def normalize(s, truncate=100):
29
+ # Replace "\n" with " "
30
+ s = s.replace("\n", " ")
31
+
32
+ # Keep only letters (including accented letters) and spaces
33
+ s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s)
34
+
35
+ # Split the string into words, truncate to the first 100 words, and join back into a string
36
+ words = s.split()
37
+ truncated = words[:truncate]
38
+ s = " ".join(truncated)
39
+
40
+ # Remove additional spaces
41
+ s = re.sub(r"\s+", " ", s)
42
+
43
+ return s
44
+
45
+
46
+
47
+ def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "", truncate_limit=100):
48
+ return (
49
+ "[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) +
50
+ " [TITLE] " + normalize(title, truncate=truncate_limit) +
51
+ " [META] " + normalize(meta, truncate=truncate_limit) +
52
+ " [ABOUT] " + normalize(aboutText, truncate=truncate_limit) +
53
+ # Assuming we want to normalize homepageText with a much higher limit or no truncation
54
+ " [HOME] " + normalize(homepageText, truncate=truncate_limit)
55
+ ).strip()
56
+
57
+ # Sample raw inputs
58
+ metakeywords = "Green Energy, Sustainability"
59
+ meta = "Exploring innovative solutions for a sustainable future."
60
+ homepageText = "Welcome to our green energy platform where we share insights and innovations..."
61
+ aboutText = "We are committed to advancing green energy solutions through research and development."
62
+ title = "Green Energy Innovations"
63
+
64
+ # Applying your preprocessing steps
65
+ full_text = create_full_text(metakeywords, title, meta, aboutText, homepageText)
66
+
67
+ print(full_text)
68
+ ```
69
+
70
+ ## Simple usage
71
+
72
+ ```python
73
+ from transformers import pipeline
74
+ import re
75
+
76
+ model_name_or_path = "Rel8ed/cleantech-cls"
77
+
78
+ classifier = pipeline('text-classification', model=model_name_or_path)
79
+
80
+ def normalize(s, truncate=100):
81
+ s = s.replace("\n", " ")
82
+ s = re.sub(r"[^a-zA-Zà-üÀ-Ü ]", "", s)
83
+ words = s.split()
84
+ truncated = words[:truncate]
85
+ s = " ".join(truncated)
86
+ s = re.sub(r"\s+", " ", s)
87
+ return s
88
+
89
+
90
+ def create_full_text(homepageText,metakeywords = "", title = "", meta = "", aboutText = "", truncate_limit=100):
91
+ return (
92
+ "[METAKEYWORD] " + normalize(metakeywords, truncate=truncate_limit) +
93
+ " [TITLE] " + normalize(title, truncate=truncate_limit) +
94
+ " [META] " + normalize(meta, truncate=truncate_limit) +
95
+ " [ABOUT] " + normalize(aboutText, truncate=truncate_limit) +
96
+ # Assuming we want to normalize homepageText with a much higher limit or no truncation
97
+ " [HOME] " + normalize(homepageText, truncate=truncate_limit)
98
+ ).strip()
99
+
100
+ text = "Welcome to our green energy platform where we share insights and innovations"
101
+
102
+ predictions = classifier(create_full_text(text))
103
+
104
+ ```