Upload README.md
Browse files
README.md
CHANGED
@@ -83,7 +83,7 @@ Using this model becomes easy when you have [sentence-transformers](https://www.
|
|
83 |
pip install -U sentence-transformers
|
84 |
```
|
85 |
|
86 |
-
|
87 |
|
88 |
```python
|
89 |
from sentence_transformers import SentenceTransformer
|
@@ -94,10 +94,57 @@ embeddings = model.encode(sentences)
|
|
94 |
print(embeddings)
|
95 |
```
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
### License
|
98 |
|
99 |
This project is licensed under the [MIT License](./LICENSE).
|
100 |
|
101 |
### Copyright
|
102 |
|
103 |
-
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu).
|
|
|
83 |
pip install -U sentence-transformers
|
84 |
```
|
85 |
|
86 |
+
### Embeddings
|
87 |
|
88 |
```python
|
89 |
from sentence_transformers import SentenceTransformer
|
|
|
94 |
print(embeddings)
|
95 |
```
|
96 |
|
97 |
+
### Advanced Usage
|
98 |
+
|
99 |
+
```python
|
100 |
+
from sentence_transformers import SentenceTransformer, util
|
101 |
+
import torch
|
102 |
+
|
103 |
+
# Define sentences in Flemish
|
104 |
+
sentences = [
|
105 |
+
"Wat is de hoofdstad van Engeland?",
|
106 |
+
"Welk dier is het warmste ter wereld?",
|
107 |
+
"Hoe kan ik Vlaams leren?",
|
108 |
+
"Wat is het meest populaire gerecht in België?",
|
109 |
+
"Welk soort kleding draagt men voor Vlaamse feesten?"
|
110 |
+
]
|
111 |
+
|
112 |
+
# Load the Flemish-trained model
|
113 |
+
model = SentenceTransformer('0xnu/pmmlv2-fine-tuned-flemish')
|
114 |
+
|
115 |
+
# Compute embeddings
|
116 |
+
embeddings = model.encode(sentences, convert_to_tensor=True)
|
117 |
+
|
118 |
+
# Function to find the closest sentence
|
119 |
+
def find_closest_sentence(query_embedding, sentence_embeddings, sentences):
|
120 |
+
# Compute cosine similarities
|
121 |
+
cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
|
122 |
+
# Find the position of the highest score
|
123 |
+
best_match_index = torch.argmax(cosine_scores).item()
|
124 |
+
return sentences[best_match_index], cosine_scores[best_match_index].item()
|
125 |
+
|
126 |
+
query = "Wat is de hoofdstad van Engeland?"
|
127 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
128 |
+
closest_sentence, similarity_score = find_closest_sentence(query_embedding, embeddings, sentences)
|
129 |
+
|
130 |
+
print(f"Vraag: {query}")
|
131 |
+
print(f"Meest gelijkende zin: {closest_sentence}")
|
132 |
+
print(f"Overeenkomstscore: {similarity_score:.4f}")
|
133 |
+
|
134 |
+
# You can also try with a new sentence not in the original list
|
135 |
+
new_query = "Wie is de huidige koning van België?"
|
136 |
+
new_query_embedding = model.encode(new_query, convert_to_tensor=True)
|
137 |
+
closest_sentence, similarity_score = find_closest_sentence(new_query_embedding, embeddings, sentences)
|
138 |
+
|
139 |
+
print(f"\nNieuwe vraag: {new_query}")
|
140 |
+
print(f"Meest gelijkende zin: {closest_sentence}")
|
141 |
+
print(f"Overeenkomstscore: {similarity_score:.4f}")
|
142 |
+
```
|
143 |
+
|
144 |
### License
|
145 |
|
146 |
This project is licensed under the [MIT License](./LICENSE).
|
147 |
|
148 |
### Copyright
|
149 |
|
150 |
+
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu).
|