PinoCorgi commited on
Commit
138d490
1 Parent(s): 59979fe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from bs4 import BeautifulSoup
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ import pickle
5
+ import torch
6
+ import io
7
+
8
+ class CPU_Unpickler(pickle.Unpickler):
9
+ def find_class(self, module, name):
10
+ if module == 'torch.storage' and name == '_load_from_bytes':
11
+ return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
12
+ else: return super().find_class(module, name)
13
+
14
+
15
+ @st.cache_resource
16
+ def get_hugging_face_model():
17
+ model_name = "mchochlov/codebert-base-cd-ft"
18
+ hf = HuggingFaceEmbeddings(model_name=model_name)
19
+ return hf
20
+
21
+
22
+ @st.cache_resource
23
+ def get_db():
24
+ with open("codesearchdb.pickle", "rb") as f:
25
+ db = CPU_Unpickler(f).load()
26
+ return db
27
+
28
+
29
+ def get_similar_links(query, db, embeddings):
30
+ docs_and_scores = db.similarity_search_by_vector(embedding_vector)
31
+ hrefs = []
32
+ for docs in docs_and_scores:
33
+ html_doc = docs.page_content
34
+ soup = BeautifulSoup(html_doc, 'html.parser')
35
+ href = [a['href'] for a in soup.find_all('a', href=True)]
36
+ hrefs.append(href)
37
+ return hrefs
38
+
39
+
40
+ embedding_vector = get_hugging_face_model()
41
+ db = get_db()
42
+ st.title("📒 PDSASearch Engine 🤖 ")
43
+ text_input = st.text_input("Enter some text")
44
+ button = st.button("Find Similar Questions on Leetcode")
45
+ if text_input:
46
+ query = text_input
47
+ answer = get_similar_links(query, db, embedding_vector)
48
+ st.write("".join(answer))
49
+
50
+ else:
51
+ st.info("Please Input Valid Text")
52
+
53
+ with st.sidebar:
54
+ st.markdown("""
55
+ ### Created by Ashwin Rachha.
56
+ Source Data : https://github.com/AshwinRachha/LeetCode-Solutions
57
+ Medium Blog :
58
+ """)