DSA_Recommendor / app.py
PinoCorgi's picture
Create app.py
138d490
raw
history blame
No virus
1.61 kB
import streamlit as st
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
import pickle
import torch
import io
class CPU_Unpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == 'torch.storage' and name == '_load_from_bytes':
return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
else: return super().find_class(module, name)
@st.cache_resource
def get_hugging_face_model():
model_name = "mchochlov/codebert-base-cd-ft"
hf = HuggingFaceEmbeddings(model_name=model_name)
return hf
@st.cache_resource
def get_db():
with open("codesearchdb.pickle", "rb") as f:
db = CPU_Unpickler(f).load()
return db
def get_similar_links(query, db, embeddings):
docs_and_scores = db.similarity_search_by_vector(embedding_vector)
hrefs = []
for docs in docs_and_scores:
html_doc = docs.page_content
soup = BeautifulSoup(html_doc, 'html.parser')
href = [a['href'] for a in soup.find_all('a', href=True)]
hrefs.append(href)
return hrefs
embedding_vector = get_hugging_face_model()
db = get_db()
st.title("πŸ“’ PDSASearch Engine πŸ€– ")
text_input = st.text_input("Enter some text")
button = st.button("Find Similar Questions on Leetcode")
if text_input:
query = text_input
answer = get_similar_links(query, db, embedding_vector)
st.write("".join(answer))
else:
st.info("Please Input Valid Text")
with st.sidebar:
st.markdown("""
### Created by Ashwin Rachha.
Source Data : https://github.com/AshwinRachha/LeetCode-Solutions
Medium Blog :
""")