Spaces:
Running
Running
FridayMaster
commited on
Commit
•
155ba37
1
Parent(s):
6dc00a6
Update app.py
Browse files
app.py
CHANGED
@@ -10,9 +10,9 @@ import nltk
|
|
10 |
nltk.download('punkt')
|
11 |
nltk.download('punkt_tab')
|
12 |
|
13 |
-
#
|
14 |
-
manual_path = "ubuntu_manual.txt"
|
15 |
faiss_path = "manual_chunked_faiss_index_500.bin"
|
|
|
16 |
|
17 |
# Load the Ubuntu manual from a .txt file
|
18 |
try:
|
@@ -22,7 +22,7 @@ except FileNotFoundError:
|
|
22 |
raise FileNotFoundError(f"The file {manual_path} was not found.")
|
23 |
|
24 |
# Function to chunk the text into smaller pieces
|
25 |
-
def chunk_text(text, chunk_size=500):
|
26 |
sentences = sent_tokenize(text)
|
27 |
chunks = []
|
28 |
current_chunk = []
|
@@ -46,31 +46,31 @@ manual_chunks = chunk_text(full_text, chunk_size=500)
|
|
46 |
try:
|
47 |
index = faiss.read_index(faiss_path)
|
48 |
except Exception as e:
|
49 |
-
raise RuntimeError(f"Failed to load FAISS index
|
50 |
|
51 |
# Load your embedding model
|
52 |
-
embedding_model = SentenceTransformer('
|
53 |
|
54 |
# OpenAI API key
|
55 |
-
openai.api_key = 'sk-proj-
|
56 |
|
57 |
# Function to create embeddings
|
58 |
def embed_text(text_list):
|
59 |
-
|
|
|
|
|
60 |
|
61 |
# Function to retrieve relevant chunks for a user query
|
62 |
def retrieve_chunks(query, k=5):
|
63 |
query_embedding = embed_text([query])
|
64 |
|
65 |
-
# Search the FAISS index
|
66 |
try:
|
67 |
distances, indices = index.search(query_embedding, k=k)
|
68 |
-
print("Indices:", indices)
|
69 |
-
print("Distances:", distances)
|
70 |
except Exception as e:
|
71 |
raise RuntimeError(f"FAISS search failed: {e}")
|
72 |
-
|
73 |
-
# Check if indices are valid
|
74 |
if len(indices[0]) == 0:
|
75 |
return []
|
76 |
|
@@ -129,4 +129,3 @@ if __name__ == "__main__":
|
|
129 |
|
130 |
|
131 |
|
132 |
-
|
|
|
10 |
nltk.download('punkt')
|
11 |
nltk.download('punkt_tab')
|
12 |
|
13 |
+
# Paths
|
|
|
14 |
faiss_path = "manual_chunked_faiss_index_500.bin"
|
15 |
+
manual_path = "ubuntu_manual.txt"
|
16 |
|
17 |
# Load the Ubuntu manual from a .txt file
|
18 |
try:
|
|
|
22 |
raise FileNotFoundError(f"The file {manual_path} was not found.")
|
23 |
|
24 |
# Function to chunk the text into smaller pieces
|
25 |
+
def chunk_text(text, chunk_size=500):
|
26 |
sentences = sent_tokenize(text)
|
27 |
chunks = []
|
28 |
current_chunk = []
|
|
|
46 |
try:
|
47 |
index = faiss.read_index(faiss_path)
|
48 |
except Exception as e:
|
49 |
+
raise RuntimeError(f"Failed to load FAISS index: {e}")
|
50 |
|
51 |
# Load your embedding model
|
52 |
+
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
53 |
|
54 |
# OpenAI API key
|
55 |
+
openai.api_key = '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'
|
56 |
|
57 |
# Function to create embeddings
|
58 |
def embed_text(text_list):
|
59 |
+
embeddings = embedding_model.encode(text_list)
|
60 |
+
print("Embedding shape:", embeddings.shape) # Debugging: Print shape
|
61 |
+
return np.array(embeddings, dtype=np.float32)
|
62 |
|
63 |
# Function to retrieve relevant chunks for a user query
|
64 |
def retrieve_chunks(query, k=5):
|
65 |
query_embedding = embed_text([query])
|
66 |
|
|
|
67 |
try:
|
68 |
distances, indices = index.search(query_embedding, k=k)
|
69 |
+
print("Indices:", indices) # Debugging: Print indices
|
70 |
+
print("Distances:", distances) # Debugging: Print distances
|
71 |
except Exception as e:
|
72 |
raise RuntimeError(f"FAISS search failed: {e}")
|
73 |
+
|
|
|
74 |
if len(indices[0]) == 0:
|
75 |
return []
|
76 |
|
|
|
129 |
|
130 |
|
131 |
|
|