wordllama / app.py
1littlecoder's picture
Update app.py
a177aa8 verified
raw
history blame
8.25 kB
import gradio as gr
from wordllama import WordLlama
# Load the default WordLlama model
wl = WordLlama.load()
def calculate_similarity(sentence1, sentence2):
similarity_score = wl.similarity(sentence1, sentence2)
return similarity_score
def rank_documents(query, candidates):
ranked_docs = wl.rank(query, candidates)
return ranked_docs
def deduplicate_candidates(candidates, threshold):
deduplicated = wl.deduplicate(candidates, threshold)
return deduplicated
def filter_candidates(query, candidates, threshold):
filtered = wl.filter(query, candidates, threshold)
return filtered
def topk_candidates(query, candidates, k):
topk = wl.topk(query, candidates, k)
return topk
def create_gradio_interface():
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# WordLlama")
gr.Markdown("<img src="https://github.com/dleemiller/WordLlama/raw/main/wordllama.png" width="100" height="100">")
with gr.Tab("Similarity"):
with gr.Row():
sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
similarity_output = gr.Number(label="Similarity Score")
submit_similarity_btn = gr.Button("Calculate Similarity")
submit_similarity_btn.click(
fn=calculate_similarity,
inputs=[sentence1, sentence2],
outputs=[similarity_output]
)
examples_similarity = gr.Examples(
examples=[
["I love programming.", "I enjoy coding."],
["The weather is sunny.", "It's a bright day."],
["I need coffee.", "I'm looking for a coffee shop."]
],
inputs=[sentence1, sentence2],
)
with gr.Tab("Rank Documents"):
query = gr.Textbox(label="Query", placeholder="Enter the query here...")
candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
submit_rank_btn = gr.Button("Rank Documents")
submit_rank_btn.click(
fn=lambda q, c: rank_documents(q, c.split(',')),
inputs=[query, candidates],
outputs=[ranked_docs_output]
)
examples_rank = gr.Examples(
examples=[
["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle"],
["Looking for a restaurant", "I need food, I'm hungry, I want to eat, Let's find a place to eat"],
["Best programming languages", "Python, JavaScript, Java, C++"]
],
inputs=[query, candidates],
)
with gr.Tab("Deduplicate Candidates"):
candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
submit_dedup_btn = gr.Button("Deduplicate")
submit_dedup_btn.click(
fn=lambda c, t: deduplicate_candidates(c.split(','), t),
inputs=[candidates_dedup, threshold_dedup],
outputs=[deduplicated_output]
)
examples_dedup = gr.Examples(
examples=[
["apple, apple, orange, banana", 0.8],
["cat, dog, cat, bird, dog", 0.9],
["text, text, more text, text", 0.7]
],
inputs=[candidates_dedup, threshold_dedup],
)
with gr.Tab("Filter Candidates"):
filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
filtered_output = gr.Textbox(label="Filtered Candidates")
submit_filter_btn = gr.Button("Filter Candidates")
submit_filter_btn.click(
fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
inputs=[filter_query, candidates_filter, threshold_filter],
outputs=[filtered_output]
)
examples_filter = gr.Examples(
examples=[
["I went to the car", "I went to the park, I went to the shop, I went to the truck", 0.3],
["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 0.4],
["Best programming languages", "Python, JavaScript, Java, C++", 0.5]
],
inputs=[filter_query, candidates_filter, threshold_filter],
)
with gr.Tab("Top-k Candidates"):
topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
topk_output = gr.Textbox(label="Top-k Candidates")
submit_topk_btn = gr.Button("Get Top-k Candidates")
submit_topk_btn.click(
fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
inputs=[topk_query, candidates_topk, k],
outputs=[topk_output]
)
examples_topk = gr.Examples(
examples=[
["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle", 3],
["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 2],
["Best programming languages", "Python, JavaScript, Java, C++", 4]
],
inputs=[topk_query, candidates_topk, k],
)
gr.Markdown("""
# WordLlama Gradio Demo
**WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.
For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).
## Examples
**Calculate Similarity**
```python
from wordllama import WordLlama
# Load the default WordLlama model
wl = WordLlama.load()
# Calculate similarity between two sentences
similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
print(similarity_score) # Output: 0.06641249096796882
```
**Rank Documents**
```python
query = "i went to the car"
candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
ranked_docs = wl.rank(query, candidates)
print(ranked_docs)
# Output:
# [
# ('i went to the vehicle', 0.7441646856486314),
# ('i went to the truck', 0.2832691551894259),
# ('i went to the shop', 0.19732814982305436),
# ('i went to the park', 0.15101404519322253)
# ]
```
**Additional Inference Methods**
```python
# Fuzzy Deduplication
wl.deduplicate(candidates, threshold=0.8)
# Clustering with K-means
wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)
# Filtering Candidates
wl.filter(query, candidates, threshold=0.3)
# Top-k Candidates
wl.topk(query, candidates, k=3)
```
""")
return demo
# Create and launch the Gradio interface
demo = create_gradio_interface()
demo.launch()