Spaces:
Runtime error
Runtime error
import gradio | |
import numpy as np | |
import pandas as pd | |
import re | |
import fasttext | |
from huggingface_hub import hf_hub_download | |
model_path = hf_hub_download(repo_id="facebook/fasttext-id-vectors", filename="model.bin") | |
model = fasttext.load_model(model_path) | |
new_catalog = pd.read_csv('new_catalog.csv') | |
new_prodcatalog_vec = np.load('product_catalog_vec.npy') | |
def preprocess(text): | |
# remove units | |
regex_f = r'(\d+[.]{0,1}\d*\s*(kg|KG|Kg|ml|ML|Ml|mL|mil|ltr|LTR|liter|lt|Lt|LT|L|l|kilograms|%|gram|Gram|grm|GR|Gr|gr|g|G|wp|Wp|WP|METER|sp|Amper|Ahm|A|Btr|btr|ons|kapsul|cm|inc|cc|SL|sl))' | |
text = re.sub(regex_f, "", text) | |
# replace extra characters | |
text = text.replace('@', ' ') | |
text = text.replace('.', ' ') | |
text = text.replace('-', ' ') | |
text = text.replace('(', ' ') | |
text = text.replace(')', ' ') | |
text = text.strip() | |
# lowercase | |
text = text.lower() | |
return text | |
def extract_features(text): | |
return model.get_sentence_vector(text) | |
def cosine_similarity(v1, v2): | |
return np.dot(v1,v2) / (np.linalg.norm(v1)*np.linalg.norm(v2)) | |
def get_most_similar(target, reference, n=5): | |
score = [] | |
for r in reference: | |
if np.count_nonzero(r) == 0: # untuk menghindari score nan | |
score.append(0.) | |
else: | |
score.append(cosine_similarity(target, r)) | |
# ranking descending | |
idx = np.array(score).argsort()[::-1][:n] | |
top_score = np.take(score, idx) | |
return idx, top_score | |
def predict_catalog(text): | |
text = preprocess(text) | |
X = extract_features(text) | |
idx, top_score = get_most_similar(X, new_prodcatalog_vec) | |
catalog = np.take(new_catalog['Product SKU'].values, idx)[0] | |
return catalog | |
demo = gradio.Interface( | |
fn=predict_catalog, | |
inputs=gradio.Textbox(placeholder="Enter a product name here..."), | |
outputs="label", | |
examples=[["NPK Kujang 30.6.8 @ 25 Kg"]]) | |
demo.launch(share=True) |