Spaces:

huytx267
/

function_retrieval

Sleeping

App Files Files Community

Tran Xuan Huy commited on Aug 18, 2023

Commit

6961a96

1 Parent(s): 7b84b5b

Create app.py

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from copy import deepcopy
+import torch
+import json
+from numpy.linalg import norm
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+# necessary function
+def cosinesimilarity(vector1, vector2):
+    cosine = np.dot(vector1, vector2)/(norm(vector1)*norm(vector2))
+    return cosine
+def encode_input_and_return_top_n(input_in, db_dff, top_k, new2oldmatching):
+    embed1 = model.encode(input_in)
+    scores = []
+    db_df_in = deepcopy(db_dff)
+    db_in = list(set(db_df_in['Câu lệnh có sẵn'].tolist()))
+    for i, func in enumerate(db_in):
+        embed2 = db_df_in['Embedding'].loc[i]
+        scores.append(round(cosinesimilarity(embed1, embed2), 3))
+    db_df_in["Điểm"] = scores
+    db_df_in.sort_values(by=['Điểm'], inplace=True, ascending=False)
+    ids = db_df_in[:top_k].index.tolist()
+    output = {new2oldmatching[db_df_in['Câu lệnh có sẵn'][i].strip()]: round(db_df_in['Điểm'][i].item(), 2) for i in ids}
+    return output
+def image_classifier(Input):
+    inputt = Input.lower()
+    result = encode_input_and_return_top_n(inputt, db_df, 3, new2oldmatch)
+    return result
+def encode_database(db_in):
+    df = pd.DataFrame(list(zip(db_in, [[]]*len(db_in))), columns=["Câu lệnh có sẵn", "Embedding"])
+    for i, func in tqdm(enumerate(db_in)):
+        embedding2 = model.encode(func)
+        df['Embedding'].loc[i] = embedding2
+    else:
+        print()
+        print("Encode database successfully")
+    return df
+model = SentenceTransformer("something/model")
+model.eval()
+with open('something/new2oldmatch.json', 'r') as openfile:
+    new2oldmatch = json.load(openfile)
+    new2oldmatch = {u.strip().lower(): v.strip() for u, v in new2oldmatch.items()}
+database = [cmd.lower() for cmd in new2oldmatch.keys()]
+db_df = encode_database(database)
+demo = gr.Interface(fn=image_classifier, inputs="text", outputs="label")
+demo.launch(share=True)