File size: 1,670 Bytes
a644d5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1847363
 
5d5ebf2
 
1847363
 
a644d5c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from scipy.spatial.distance import cosine
from sentence_transformers import SentenceTransformer


model = SentenceTransformer("AI-Growth-Lab/PatentSBERTa")


def get_sim(anchor: str, target: str) -> float:
    anchor_embed = model.encode([anchor])
    target_embed = model.encode([target])
    return float(1 - cosine(anchor_embed, target_embed))


anchor_input = gr.inputs.Textbox(lines=1, placeholder="Anchor")
target_input = gr.inputs.Textbox(lines=1, placeholder="Target")

sim_output = gr.outputs.Textbox(type="number", label="Similarity")

examples = [
    ["renewable power", "renewable energy"],
    ["previously captured image", "image captured previously"],
    ["labeled ligand", "container labelling"],
    ["gold alloy", "platinum"],
    ["dissolve in glycol", "family gathering"],
]

iface = gr.Interface(
    fn=get_sim,
    inputs=[anchor_input, target_input],
    outputs=sim_output,
    examples=examples,
    theme="grass",
    title="Demo: U.S. Patent Phrase to Phrase Matching",
    description="Scores phrases from U.S. patents according to their similarity. "
    "Similarity scores are between 0 and 1, higher scores mean higher similarrity, and scores "
    "are computed as the cosine similarity of embeddings produced by the AI-Growth-Lab/PatentSBERTa SentenceTransformer model.",
    article="Examples are taken from the *Google Patent Phrase Similarity Dataset* used in the "
    "['U.S. Patent Phrase to Phrase Matching' Kaggle competition](https://www.kaggle.com/competitions/us-patent-phrase-to-phrase-matching/overview).",
)

if __name__ == "__main__":
    app, local_url, share_url = iface.launch(enable_queue=True)