Spaces:

1littlecoder
/

wordllama

Runtime error

App Files Files Community

wordllama / app.py

1littlecoder

Update app.py

118ea7d verified 3 months ago

raw

history blame contribute delete

8.13 kB

	import gradio as gr
	from wordllama import WordLlama

	# Load the default WordLlama model
	wl = WordLlama.load()

	def calculate_similarity(sentence1, sentence2):
	similarity_score = wl.similarity(sentence1, sentence2)
	return similarity_score

	def rank_documents(query, candidates):
	ranked_docs = wl.rank(query, candidates)
	return ranked_docs

	def deduplicate_candidates(candidates, threshold):
	deduplicated = wl.deduplicate(candidates, threshold)
	return deduplicated

	def filter_candidates(query, candidates, threshold):
	filtered = wl.filter(query, candidates, threshold)
	return filtered

	def topk_candidates(query, candidates, k):
	topk = wl.topk(query, candidates, k)
	return topk

	def create_gradio_interface():
	with gr.Blocks(theme=gr.themes.Soft()) as demo:

	gr.Markdown("# WordLlama")
	gr.Markdown("## NLP Toolkit")

	with gr.Tab("Similarity"):
	with gr.Row():
	sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...")
	sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...")
	similarity_output = gr.Number(label="Similarity Score")
	submit_similarity_btn = gr.Button("Calculate Similarity")
	submit_similarity_btn.click(
	fn=calculate_similarity,
	inputs=[sentence1, sentence2],
	outputs=[similarity_output]
	)
	examples_similarity = gr.Examples(
	examples=[
	["I love programming.", "I enjoy coding."],
	["The weather is sunny.", "It's a bright day."],
	["I need coffee.", "I'm looking for a coffee shop."]
	],
	inputs=[sentence1, sentence2],
	)

	with gr.Tab("Rank Documents"):
	query = gr.Textbox(label="Query", placeholder="Enter the query here...")
	candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
	ranked_docs_output = gr.Dataframe(headers=["Document", "Score"])
	submit_rank_btn = gr.Button("Rank Documents")
	submit_rank_btn.click(
	fn=lambda q, c: rank_documents(q, c.split(',')),
	inputs=[query, candidates],
	outputs=[ranked_docs_output]
	)
	examples_rank = gr.Examples(
	examples=[
	["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle"],
	["Looking for a restaurant", "I need food, I'm hungry, I want to eat, Let's find a place to eat"],
	["Best programming languages", "Python, JavaScript, Java, C++"]
	],
	inputs=[query, candidates],
	)

	with gr.Tab("Deduplicate Candidates"):
	candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
	threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8)
	deduplicated_output = gr.Textbox(label="Deduplicated Candidates")
	submit_dedup_btn = gr.Button("Deduplicate")
	submit_dedup_btn.click(
	fn=lambda c, t: deduplicate_candidates(c.split(','), t),
	inputs=[candidates_dedup, threshold_dedup],
	outputs=[deduplicated_output]
	)
	examples_dedup = gr.Examples(
	examples=[
	["apple, apple", 0.8],
	["delhi, new delhi", 0.87],
	["text, textual", 0.7]
	],
	inputs=[candidates_dedup, threshold_dedup],
	)

	with gr.Tab("Filter Candidates"):
	filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
	candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
	threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3)
	filtered_output = gr.Textbox(label="Filtered Candidates")
	submit_filter_btn = gr.Button("Filter Candidates")
	submit_filter_btn.click(
	fn=lambda q, c, t: filter_candidates(q, c.split(','), t),
	inputs=[filter_query, candidates_filter, threshold_filter],
	outputs=[filtered_output]
	)
	examples_filter = gr.Examples(
	examples=[
	["I went to the car", "I went to the park, I went to the shop, I went to the truck", 0.3],
	["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 0.4],
	["Best programming languages", "Python, JavaScript, Java, C++", 0.5]
	],
	inputs=[filter_query, candidates_filter, threshold_filter],
	)

	with gr.Tab("Top-k Candidates"):
	topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...")
	candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...")
	k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3)
	topk_output = gr.Textbox(label="Top-k Candidates")
	submit_topk_btn = gr.Button("Get Top-k Candidates")
	submit_topk_btn.click(
	fn=lambda q, c, k: topk_candidates(q, c.split(','), k),
	inputs=[topk_query, candidates_topk, k],
	outputs=[topk_output]
	)
	examples_topk = gr.Examples(
	examples=[
	["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle", 3],
	["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 2],
	["Best programming languages", "Python, JavaScript, Java, C++", 4]
	],
	inputs=[topk_query, candidates_topk, k],
	)

	gr.Markdown("""
	# WordLlama Gradio Demo

	WordLlama is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware.

	For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama).

	## Examples

	Calculate Similarity

	```python
	from wordllama import WordLlama

	# Load the default WordLlama model
	wl = WordLlama.load()

	# Calculate similarity between two sentences
	similarity_score = wl.similarity("i went to the car", "i went to the pawn shop")
	print(similarity_score) # Output: 0.06641249096796882
	```

	Rank Documents

	```python
	query = "i went to the car"
	candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"]
	ranked_docs = wl.rank(query, candidates)
	print(ranked_docs)
	# Output:
	# [
	# ('i went to the vehicle', 0.7441646856486314),
	# ('i went to the truck', 0.2832691551894259),
	# ('i went to the shop', 0.19732814982305436),
	# ('i went to the park', 0.15101404519322253)
	# ]
	```

	Additional Inference Methods

	```python
	# Fuzzy Deduplication
	wl.deduplicate(candidates, threshold=0.8)

	# Clustering with K-means
	wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4)

	# Filtering Candidates
	wl.filter(query, candidates, threshold=0.3)

	# Top-k Candidates
	wl.topk(query, candidates, k=3)
	```
	""")

	return demo

	# Create and launch the Gradio interface
	demo = create_gradio_interface()
	demo.launch()