DSA_Recommendor / app.py
PinoCorgi's picture
Update app.py
25d8a2b
raw
history blame contribute delete
No virus
2.45 kB
import streamlit as st
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
import pickle
import torch
import io
from langchain.vectorstores import FAISS
class CPU_Unpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == 'torch.storage' and name == '_load_from_bytes':
return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
else: return super().find_class(module, name)
@st.cache_resource
def get_hugging_face_model():
model_name = "mchochlov/codebert-base-cd-ft"
hf = HuggingFaceEmbeddings(model_name=model_name)
return hf
@st.cache_resource
def get_db():
with open("codesearchdb.pickle", "rb") as f:
db = CPU_Unpickler(f).load()
return db
def get_similar_links(query, db, embeddings):
embedding_vector = embeddings.embed_query(query)
docs_and_scores = db.similarity_search_by_vector(embedding_vector, k = 10)
hrefs = []
for docs in docs_and_scores:
html_doc = docs.page_content
soup = BeautifulSoup(html_doc, 'html.parser')
href = [a['href'] for a in soup.find_all('a', href=True)]
hrefs.append(href)
links = []
for href_list in hrefs:
for link in href_list:
links.append(link)
return links
embedding_vector = get_hugging_face_model()
db = FAISS.load_local("code_sim_index", embedding_vector)
st.title("πŸ“’ DSASearch Engine πŸ€– ")
text_input = st.text_area("Enter a Code Example", value =
"""
class Solution:
def subsets(self, nums: List[int]) -> List[List[int]]:
outputs = []
def backtrack(k, index, subSet):
if index == k:
outputs.append(subSet[:])
return
for i in range(index, len(nums)):
backtrack(k, i + 1, subSet + [nums[i]])
for j in range(len(nums) + 1):
backtrack(j, 0, [])
return outputs
""", height = 330
)
button = st.button("Find Similar Questions on Leetcode")
if button:
query = text_input
answer = get_similar_links(query, db, embedding_vector)
for link in set(answer):
st.write(link)
st.balloons()
else:
st.info("Please Input Valid Text")
st.markdown("""
### Created by Ashwin Rachha.
""")
st.markdown("Source Data : https://github.com/AshwinRachha/LeetCode-Solutions")
st.markdown("Medium Blog : https://medium.com/@ashwin_rachha/querying-a-code-database-to-find-similar-coding-problems-using-langchain-814730da6e6d")