Phoenix21 commited on
Commit
9724455
·
verified ·
1 Parent(s): c54b7aa

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +24 -17
pipeline.py CHANGED
@@ -131,24 +131,31 @@ def classify_query(query: str) -> str:
131
  def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
132
  try:
133
  if os.path.exists(store_dir):
134
- print(f"Loading existing FAISS store from '{store_dir}'")
135
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
136
- return FAISS.load_local(store_dir, embeddings)
137
-
138
- print(f"Building new FAISS store from CSV: {csv_path}")
139
- df = pd.read_csv(csv_path)
140
- df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
141
- df.columns = df.columns.str.strip()
142
-
143
- docs = [
144
- Document(page_content=str(row["Answer"]), metadata={"question": str(row["Question"])})
145
- for _, row in df.iterrows()
146
- ]
147
-
148
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
149
- vectorstore = FAISS.from_documents(docs, embedding=embeddings)
150
- vectorstore.save_local(store_dir)
151
- return vectorstore
 
 
 
 
 
 
 
152
 
153
  except Exception as e:
154
  raise RuntimeError(f"Error building/loading vector store: {str(e)}")
 
131
  def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
132
  try:
133
  if os.path.exists(store_dir):
134
+ print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
135
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
136
+ vectorstore = FAISS.load_local(store_dir, embeddings)
137
+ return vectorstore
138
+ else:
139
+ print(f"DEBUG: Building new store from CSV: {csv_path}")
140
+ df = pd.read_csv(csv_path)
141
+ df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
142
+ df.columns = df.columns.str.strip()
143
+ if "Answer" in df.columns:
144
+ df.rename(columns={"Answer": "Answers"}, inplace=True)
145
+ if "Question" not in df.columns and "Question " in df.columns:
146
+ df.rename(columns={"Question ": "Question"}, inplace=True)
147
+ if "Question" not in df.columns or "Answers" not in df.columns:
148
+ raise ValueError("CSV must have 'Question' and 'Answers' columns.")
149
+ docs = []
150
+ for _, row in df.iterrows():
151
+ q = str(row["Question"])
152
+ ans = str(row["Answers"])
153
+ doc = Document(page_content=ans, metadata={"question": q})
154
+ docs.append(doc)
155
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
156
+ vectorstore = FAISS.from_documents(docs, embedding=embeddings)
157
+ vectorstore.save_local(store_dir)
158
+ return vectorstore
159
 
160
  except Exception as e:
161
  raise RuntimeError(f"Error building/loading vector store: {str(e)}")