Spaces:
Runtime error
Runtime error
File size: 8,223 Bytes
7066834 1dee282 7066834 08ad968 7066834 1dee282 7066834 1dee282 7066834 152990b 1dee282 7066834 1dee282 7066834 1dee282 7066834 1dee282 7066834 1dee282 7066834 1dee282 7066834 1dee282 7066834 1dee282 7066834 bef7cb9 7066834 152990b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import gradio as gr
from wordllama import WordLlama
# Load the default WordLlama model
wl = WordLlama.load()
def calculate_similarity(sentence1, sentence2):
similarity_score = wl.similarity(sentence1, sentence2)
return similarity_score
def rank_documents(query, candidates):
ranked_docs = wl.rank(query, candidates)
return ranked_docs
def deduplicate_candidates(candidates, threshold):
deduplicated = wl.deduplicate(candidates, threshold)
return deduplicated
def filter_candidates(query, candidates, threshold):
filtered = wl.filter(query, candidates, threshold)
return filtered
def topk_candidates(query, candidates, k):
topk = wl.topk(query, candidates, k)
return topk
def create_gradio_interface():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# WordLlama")
gr.Markdown("![ WordLlama Avatar](https://github.com/dleemiller/WordLlama/raw/main/wordllama.png)")
with gr.Tab("Similarity"):
with gr.Row():
sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
similarity_output = gr.Number(label="Similarity Score")
submit_similarity_btn = gr.Button("Calculate Similarity")
submit_similarity_btn.click(
fn=calculate_similarity,
inputs=[sentence1, sentence2],
outputs=[similarity_output]
)
examples_similarity = gr.Examples(
examples=[
["I love programming.", "I enjoy coding."],
["The weather is sunny.", "It's a bright day."],
["I need coffee.", "I'm looking for a coffee shop."]
],
inputs=[sentence1, sentence2],
)
with gr.Tab("Rank Documents"):
query = gr.Textbox(label="Query", placeholder="Enter the query here...")
candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
submit_rank_btn = gr.Button("Rank Documents")
submit_rank_btn.click(
fn=lambda q, c: rank_documents(q, c.split(',')),
inputs=[query, candidates],
outputs=[ranked_docs_output]
)
examples_rank = gr.Examples(
examples=[
["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle"],
["Looking for a restaurant", "I need food, I'm hungry, I want to eat, Let's find a place to eat"],
["Best programming languages", "Python, JavaScript, Java, C++"]
],
inputs=[query, candidates],
)
with gr.Tab("Deduplicate Candidates"):
candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
submit_dedup_btn = gr.Button("Deduplicate")
submit_dedup_btn.click(
fn=lambda c, t: deduplicate_candidates(c.split(','), t),
inputs=[candidates_dedup, threshold_dedup],
outputs=[deduplicated_output]
)
examples_dedup = gr.Examples(
examples=[
["apple, apple, orange, banana", 0.8],
["cat, dog, cat, bird, dog", 0.9],
["text, text, more text, text", 0.7]
],
inputs=[candidates_dedup, threshold_dedup],
)
with gr.Tab("Filter Candidates"):
filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
filtered_output = gr.Textbox(label="Filtered Candidates")
submit_filter_btn = gr.Button("Filter Candidates")
submit_filter_btn.click(
fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
inputs=[filter_query, candidates_filter, threshold_filter],
outputs=[filtered_output]
)
examples_filter = gr.Examples(
examples=[
["I went to the car", "I went to the park, I went to the shop, I went to the truck", 0.3],
["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 0.4],
["Best programming languages", "Python, JavaScript, Java, C++", 0.5]
],
inputs=[filter_query, candidates_filter, threshold_filter],
)
with gr.Tab("Top-k Candidates"):
topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
topk_output = gr.Textbox(label="Top-k Candidates")
submit_topk_btn = gr.Button("Get Top-k Candidates")
submit_topk_btn.click(
fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
inputs=[topk_query, candidates_topk, k],
outputs=[topk_output]
)
examples_topk = gr.Examples(
examples=[
["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle", 3],
["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 2],
["Best programming languages", "Python, JavaScript, Java, C++", 4]
],
inputs=[topk_query, candidates_topk, k],
)
gr.Markdown("""
# WordLlama Gradio Demo
**WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
## Examples
**Calculate Similarity**
```python
from wordllama import WordLlama
# Load the default WordLlama model
wl = WordLlama.load()
# Calculate similarity between two sentences
similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
print(similarity_score) # Output: 0.06641249096796882
```
**Rank Documents**
```python
query = "i went to the car"
candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
ranked_docs = wl.rank(query, candidates)
print(ranked_docs)
# Output:
# [
# ('i went to the vehicle', 0.7441646856486314),
# ('i went to the truck', 0.2832691551894259),
# ('i went to the shop', 0.19732814982305436),
# ('i went to the park', 0.15101404519322253)
# ]
```
**Additional Inference Methods**
```python
# Fuzzy Deduplication
wl.deduplicate(candidates, threshold=0.8)
# Clustering with K-means
wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
# Filtering Candidates
wl.filter(query, candidates, threshold=0.3)
# Top-k Candidates
wl.topk(query, candidates, k=3)
```
""")
return demo
# Create and launch the Gradio interface
demo = create_gradio_interface()
demo.launch()
|