uspppm-demo / app.py
Alexander Junge
Fix typos
5d5ebf2
raw
history blame
1.67 kB
import gradio as gr
from scipy.spatial.distance import cosine
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("AI-Growth-Lab/PatentSBERTa")
def get_sim(anchor: str, target: str) -> float:
anchor_embed = model.encode([anchor])
target_embed = model.encode([target])
return float(1 - cosine(anchor_embed, target_embed))
anchor_input = gr.inputs.Textbox(lines=1, placeholder="Anchor")
target_input = gr.inputs.Textbox(lines=1, placeholder="Target")
sim_output = gr.outputs.Textbox(type="number", label="Similarity")
examples = [
["renewable power", "renewable energy"],
["previously captured image", "image captured previously"],
["labeled ligand", "container labelling"],
["gold alloy", "platinum"],
["dissolve in glycol", "family gathering"],
]
iface = gr.Interface(
fn=get_sim,
inputs=[anchor_input, target_input],
outputs=sim_output,
examples=examples,
theme="grass",
title="Demo: U.S. Patent Phrase to Phrase Matching",
description="Scores phrases from U.S. patents according to their similarity. "
"Similarity scores are between 0 and 1, higher scores mean higher similarrity, and scores "
"are computed as the cosine similarity of embeddings produced by the AI-Growth-Lab/PatentSBERTa SentenceTransformer model.",
article="Examples are taken from the *Google Patent Phrase Similarity Dataset* used in the "
"['U.S. Patent Phrase to Phrase Matching' Kaggle competition](https://www.kaggle.com/competitions/us-patent-phrase-to-phrase-matching/overview).",
)
if __name__ == "__main__":
app, local_url, share_url = iface.launch(enable_queue=True)