Spaces:
Sleeping
Sleeping
FridayMaster
commited on
Commit
•
6dc00a6
1
Parent(s):
90336d3
Update app.py
Browse files
app.py
CHANGED
@@ -10,13 +10,16 @@ import nltk
|
|
10 |
nltk.download('punkt')
|
11 |
nltk.download('punkt_tab')
|
12 |
|
13 |
-
|
|
|
|
|
|
|
14 |
# Load the Ubuntu manual from a .txt file
|
15 |
try:
|
16 |
-
with open(
|
17 |
full_text = file.read()
|
18 |
except FileNotFoundError:
|
19 |
-
raise FileNotFoundError("The file
|
20 |
|
21 |
# Function to chunk the text into smaller pieces
|
22 |
def chunk_text(text, chunk_size=500): # Larger chunks
|
@@ -41,10 +44,9 @@ manual_chunks = chunk_text(full_text, chunk_size=500)
|
|
41 |
|
42 |
# Load your FAISS index
|
43 |
try:
|
44 |
-
|
45 |
-
index = faiss.read_index("/absolute/path/to/manual_chunked_faiss_index_500.bin")
|
46 |
except Exception as e:
|
47 |
-
raise RuntimeError(f"Failed to load FAISS index: {e}")
|
48 |
|
49 |
# Load your embedding model
|
50 |
embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
|
@@ -52,7 +54,6 @@ embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
|
|
52 |
# OpenAI API key
|
53 |
openai.api_key = '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'
|
54 |
|
55 |
-
|
56 |
# Function to create embeddings
|
57 |
def embed_text(text_list):
|
58 |
return np.array(embedding_model.encode(text_list), dtype=np.float32)
|
@@ -64,6 +65,8 @@ def retrieve_chunks(query, k=5):
|
|
64 |
# Search the FAISS index
|
65 |
try:
|
66 |
distances, indices = index.search(query_embedding, k=k)
|
|
|
|
|
67 |
except Exception as e:
|
68 |
raise RuntimeError(f"FAISS search failed: {e}")
|
69 |
|
|
|
10 |
nltk.download('punkt')
|
11 |
nltk.download('punkt_tab')
|
12 |
|
13 |
+
# Define paths as variables
|
14 |
+
manual_path = "ubuntu_manual.txt"
|
15 |
+
faiss_path = "manual_chunked_faiss_index_500.bin"
|
16 |
+
|
17 |
# Load the Ubuntu manual from a .txt file
|
18 |
try:
|
19 |
+
with open(manual_path, "r", encoding="utf-8") as file:
|
20 |
full_text = file.read()
|
21 |
except FileNotFoundError:
|
22 |
+
raise FileNotFoundError(f"The file {manual_path} was not found.")
|
23 |
|
24 |
# Function to chunk the text into smaller pieces
|
25 |
def chunk_text(text, chunk_size=500): # Larger chunks
|
|
|
44 |
|
45 |
# Load your FAISS index
|
46 |
try:
|
47 |
+
index = faiss.read_index(faiss_path)
|
|
|
48 |
except Exception as e:
|
49 |
+
raise RuntimeError(f"Failed to load FAISS index from {faiss_path}: {e}")
|
50 |
|
51 |
# Load your embedding model
|
52 |
embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
|
|
|
54 |
# OpenAI API key
|
55 |
openai.api_key = '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'
|
56 |
|
|
|
57 |
# Function to create embeddings
|
58 |
def embed_text(text_list):
|
59 |
return np.array(embedding_model.encode(text_list), dtype=np.float32)
|
|
|
65 |
# Search the FAISS index
|
66 |
try:
|
67 |
distances, indices = index.search(query_embedding, k=k)
|
68 |
+
print("Indices:", indices)
|
69 |
+
print("Distances:", distances)
|
70 |
except Exception as e:
|
71 |
raise RuntimeError(f"FAISS search failed: {e}")
|
72 |
|