DSA_Recommendor / app.py
PinoCorgi's picture
Update app.py
64dce86
raw
history blame
2.35 kB
import streamlit as st
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
import pickle
import torch
import io
class CPU_Unpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == 'torch.storage' and name == '_load_from_bytes':
return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
else: return super().find_class(module, name)
@st.cache_resource
def get_hugging_face_model():
model_name = "mchochlov/codebert-base-cd-ft"
hf = HuggingFaceEmbeddings(model_name=model_name)
return hf
@st.cache_resource
def get_db():
with open("codesearchdb.pickle", "rb") as f:
db = CPU_Unpickler(f).load()
return db
def get_similar_links(query, db, embeddings):
embedding_vector = embeddings.embed_query(query)
docs_and_scores = db.similarity_search_by_vector(embedding_vector)
hrefs = []
for docs in docs_and_scores:
html_doc = docs.page_content
soup = BeautifulSoup(html_doc, 'html.parser')
href = [a['href'] for a in soup.find_all('a', href=True)]
hrefs.append(href)
links = []
for href_list in hrefs:
for link in href_list:
links.append(link)
return links
embedding_vector = get_hugging_face_model()
db = get_db()
st.title("πŸ“’ DSASearch Engine πŸ€– ")
text_input = st.text_input("Enter some text", value =
"""
class Solution:
def subsets(self, nums: List[int]) -> List[List[int]]:
outputs = []
def backtrack(k, index, subSet):
if index == k:
outputs.append(subSet[:])
return
for i in range(index, len(nums)):
backtrack(k, i + 1, subSet + [nums[i]])
for j in range(len(nums) + 1):
backtrack(j, 0, [])
return outputs
"""
)
button = st.button("Find Similar Questions on Leetcode")
if text_input:
query = text_input
answer = get_similar_links(query, db, embedding_vector)
for link in set(answer):
st.write(link)
st.balloons()
else:
st.info("Please Input Valid Text")
st.markdown("""
## Created by Ashwin Rachha.<br />
Source Data : https://github.com/AshwinRachha/LeetCode-Solutions<br />
Medium Blog : https://medium.com/@ashwin_rachha/querying-a-code-database-to-find-similar-coding-problems-using-langchain-814730da6e6d<br />
""")