Spaces:
Paused
Paused
jeevan
commited on
Commit
•
936a3c3
1
Parent(s):
98b8fb3
refactoring
Browse files- SDG-Generation-logs +0 -0
- app.py +22 -45
- pre-processing.ipynb +1148 -444
- requirements.txt +4 -2
- task3-generate-dataset-ragas-eval.ipynb +590 -0
SDG-Generation-logs
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -5,26 +5,26 @@ import chainlit as cl
|
|
5 |
from chainlit.types import AskFileResponse
|
6 |
from langchain.memory import ConversationBufferMemory
|
7 |
from langchain_core.chat_history import BaseChatMessageHistory
|
|
|
8 |
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
|
9 |
from langchain.prompts import MessagesPlaceholder
|
10 |
from langchain.prompts import ChatPromptTemplate
|
11 |
-
from langchain_community.chat_message_histories import ChatMessageHistory
|
12 |
from langchain.chains.history_aware_retriever import create_history_aware_retriever
|
13 |
from langchain.chains.retrieval import create_retrieval_chain
|
14 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
15 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
from langchain_experimental.text_splitter import SemanticChunker
|
17 |
from langchain_qdrant import QdrantVectorStore
|
18 |
from langchain_core.documents import Document
|
19 |
from qdrant_client import QdrantClient
|
20 |
from qdrant_client.http.models import Distance, VectorParams
|
21 |
from langchain_openai import ChatOpenAI
|
22 |
-
from embedding_model import get_embeddings_openai_text_3_large,get_embeddings_snowflake_arctic_embed_l
|
23 |
-
from pdfloader import PDFLoaderWrapper
|
24 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
25 |
-
from chainlit.input_widget import Select, Switch, Slider
|
26 |
from dotenv import load_dotenv
|
27 |
from langchain_huggingface import HuggingFaceEmbeddings
|
|
|
|
|
|
|
28 |
|
29 |
load_dotenv()
|
30 |
|
@@ -60,7 +60,21 @@ async def connect_to_qdrant():
|
|
60 |
collection_name=collection_name,
|
61 |
embedding=embedding_model,
|
62 |
)
|
63 |
-
return vector_store.as_retriever()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
def initialize_vectorstore(
|
66 |
collection_name: str,
|
@@ -155,7 +169,6 @@ def create_session_id():
|
|
155 |
|
156 |
@cl.on_chat_start
|
157 |
async def start():
|
158 |
-
# cl.user_session.set("memory", conversation_buffer_memory)
|
159 |
msg = cl.Message(content=welcome_message)
|
160 |
await msg.send()
|
161 |
|
@@ -163,46 +176,10 @@ async def start():
|
|
163 |
session_id = create_session_id()
|
164 |
cl.user_session.set("session_id", session_id)
|
165 |
|
166 |
-
# Preserve chat history
|
167 |
-
conversation_buffer_memory = ConversationBufferMemory(
|
168 |
-
memory_key="chat_history",
|
169 |
-
output_key="answer",
|
170 |
-
chat_memory=ChatMessageHistory(),
|
171 |
-
return_messages=True,
|
172 |
-
)
|
173 |
-
|
174 |
-
# todo: if logged in user is admin then allow them to upload new pdfs.
|
175 |
-
|
176 |
-
# # Embedding model
|
177 |
-
# # embedding_model, dimension = get_embeddings_openai_text_3_large()
|
178 |
-
# embedding_model, dimension = get_embeddings_snowflake_arctic_embed_l()
|
179 |
-
# msg.content = "Embedding model loaded"
|
180 |
-
# await msg.update()
|
181 |
-
# cl.user_session.set("embedding_model", embedding_model)
|
182 |
-
# cl.user_session.set("dimension", dimension)
|
183 |
-
|
184 |
-
# # Pdf loader
|
185 |
-
# pdf_loader = PDFLoaderWrapper(
|
186 |
-
# documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF
|
187 |
-
# )
|
188 |
-
# msg.content = "Embedding model loaded"
|
189 |
-
# await msg.update()
|
190 |
-
# cl.user_session.set("pdf_loader", pdf_loader)
|
191 |
-
# documents = await pdf_loader.aload()
|
192 |
-
|
193 |
-
# text_splitter = get_text_splitter("semantic", embedding_model)
|
194 |
-
|
195 |
-
# chunked_docs = text_splitter.split_documents(documents)
|
196 |
-
|
197 |
-
# vector_store = initialize_vectorstore(
|
198 |
-
# collection_name, embedding_model, dimension=dimension
|
199 |
-
# )
|
200 |
-
|
201 |
-
# vector_store = populate_vectorstore(vector_store, chunked_docs)
|
202 |
-
|
203 |
retriever = await connect_to_qdrant()
|
|
|
204 |
|
205 |
-
rag_chain = create_rag_chain(chat_model,
|
206 |
|
207 |
store = {}
|
208 |
|
|
|
5 |
from chainlit.types import AskFileResponse
|
6 |
from langchain.memory import ConversationBufferMemory
|
7 |
from langchain_core.chat_history import BaseChatMessageHistory
|
8 |
+
from langchain_community.chat_message_histories import ChatMessageHistory
|
9 |
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
|
10 |
from langchain.prompts import MessagesPlaceholder
|
11 |
from langchain.prompts import ChatPromptTemplate
|
|
|
12 |
from langchain.chains.history_aware_retriever import create_history_aware_retriever
|
13 |
from langchain.chains.retrieval import create_retrieval_chain
|
14 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
|
|
15 |
from langchain_experimental.text_splitter import SemanticChunker
|
16 |
from langchain_qdrant import QdrantVectorStore
|
17 |
from langchain_core.documents import Document
|
18 |
from qdrant_client import QdrantClient
|
19 |
from qdrant_client.http.models import Distance, VectorParams
|
20 |
from langchain_openai import ChatOpenAI
|
|
|
|
|
21 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
22 |
+
# from chainlit.input_widget import Select, Switch, Slider
|
23 |
from dotenv import load_dotenv
|
24 |
from langchain_huggingface import HuggingFaceEmbeddings
|
25 |
+
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
|
26 |
+
from langchain.retrievers.document_compressors import LLMChainExtractor
|
27 |
+
|
28 |
|
29 |
load_dotenv()
|
30 |
|
|
|
60 |
collection_name=collection_name,
|
61 |
embedding=embedding_model,
|
62 |
)
|
63 |
+
return vector_store.as_retriever(search_type="similarity_score_threshold",search_kwargs={'k':10,'score_threshold': 0.8})
|
64 |
+
|
65 |
+
async def get_contextual_compressed_retriever(retriver):
|
66 |
+
|
67 |
+
base_retriever = retriver
|
68 |
+
compressor_llm = ChatOpenAI(temperature=0, model_name="gpt-4o", max_tokens=4000)
|
69 |
+
compressor = LLMChainExtractor.from_llm(compressor_llm)
|
70 |
+
|
71 |
+
#Combine the retriever with the compressor
|
72 |
+
compression_retriever = ContextualCompressionRetriever(
|
73 |
+
base_compressor=compressor,
|
74 |
+
base_retriever=base_retriever
|
75 |
+
)
|
76 |
+
return compression_retriever
|
77 |
+
|
78 |
|
79 |
def initialize_vectorstore(
|
80 |
collection_name: str,
|
|
|
169 |
|
170 |
@cl.on_chat_start
|
171 |
async def start():
|
|
|
172 |
msg = cl.Message(content=welcome_message)
|
173 |
await msg.send()
|
174 |
|
|
|
176 |
session_id = create_session_id()
|
177 |
cl.user_session.set("session_id", session_id)
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
retriever = await connect_to_qdrant()
|
180 |
+
contextual_compressed_retriever = await get_contextual_compressed_retriever(retriever)
|
181 |
|
182 |
+
rag_chain = create_rag_chain(chat_model, contextual_compressed_retriever)
|
183 |
|
184 |
store = {}
|
185 |
|
pre-processing.ipynb
CHANGED
@@ -24,7 +24,7 @@
|
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
-
"execution_count":
|
28 |
"metadata": {},
|
29 |
"outputs": [],
|
30 |
"source": [
|
@@ -40,14 +40,14 @@
|
|
40 |
},
|
41 |
{
|
42 |
"cell_type": "code",
|
43 |
-
"execution_count":
|
44 |
"metadata": {},
|
45 |
"outputs": [
|
46 |
{
|
47 |
"name": "stderr",
|
48 |
"output_type": "stream",
|
49 |
"text": [
|
50 |
-
"/Users/jeevan/Documents/Learnings/ai-engineering-bootcamp/AIE4/
|
51 |
" from tqdm.autonotebook import tqdm, trange\n"
|
52 |
]
|
53 |
}
|
@@ -62,7 +62,7 @@
|
|
62 |
},
|
63 |
{
|
64 |
"cell_type": "code",
|
65 |
-
"execution_count":
|
66 |
"metadata": {},
|
67 |
"outputs": [],
|
68 |
"source": [
|
@@ -75,11 +75,27 @@
|
|
75 |
")\n",
|
76 |
"documents = await pdf_loader.aload()\n",
|
77 |
"\n",
|
78 |
-
"text_splitter = SemanticChunker(embedding_model, breakpoint_threshold_type=\"percentile\",breakpoint_threshold_amount=90)\n",
|
79 |
"\n",
|
80 |
"chunked_docs = text_splitter.split_documents(documents)\n"
|
81 |
]
|
82 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
{
|
84 |
"cell_type": "code",
|
85 |
"execution_count": 4,
|
@@ -89,452 +105,444 @@
|
|
89 |
"import os\n",
|
90 |
"import getpass\n",
|
91 |
"\n",
|
92 |
-
"os.environ[\"
|
|
|
|
|
93 |
]
|
94 |
},
|
95 |
{
|
96 |
"cell_type": "code",
|
97 |
-
"execution_count":
|
98 |
"metadata": {},
|
99 |
"outputs": [
|
100 |
{
|
101 |
"data": {
|
102 |
"text/plain": [
|
103 |
-
"['
|
104 |
-
" '
|
105 |
-
" '
|
106 |
-
" '
|
107 |
-
" '
|
108 |
-
" '
|
109 |
-
" '
|
110 |
-
" '
|
111 |
-
" '
|
112 |
-
" '
|
113 |
-
" '
|
114 |
-
" '
|
115 |
-
" '
|
116 |
-
" '
|
117 |
-
" '
|
118 |
-
" '
|
119 |
-
" '
|
120 |
-
" '
|
121 |
-
" '
|
122 |
-
" '
|
123 |
-
" '
|
124 |
-
" '
|
125 |
-
" '
|
126 |
-
" '
|
127 |
-
" '
|
128 |
-
" '
|
129 |
-
" '
|
130 |
-
" '
|
131 |
-
" '
|
132 |
-
" '
|
133 |
-
" '
|
134 |
-
" '
|
135 |
-
" '
|
136 |
-
" '
|
137 |
-
" '
|
138 |
-
" '
|
139 |
-
" '
|
140 |
-
" '
|
141 |
-
" '
|
142 |
-
" '
|
143 |
-
" '
|
144 |
-
" '
|
145 |
-
" '
|
146 |
-
" '
|
147 |
-
" '
|
148 |
-
" '
|
149 |
-
" '
|
150 |
-
" '
|
151 |
-
" '
|
152 |
-
" '
|
153 |
-
" '
|
154 |
-
" '
|
155 |
-
" '
|
156 |
-
" '
|
157 |
-
" '
|
158 |
-
" '
|
159 |
-
" '
|
160 |
-
" '
|
161 |
-
" '
|
162 |
-
" '
|
163 |
-
" '
|
164 |
-
" '
|
165 |
-
" '
|
166 |
-
" '
|
167 |
-
" '
|
168 |
-
" '
|
169 |
-
" '
|
170 |
-
" '
|
171 |
-
" '
|
172 |
-
" '
|
173 |
-
" '
|
174 |
-
" '
|
175 |
-
" '
|
176 |
-
" '
|
177 |
-
" '
|
178 |
-
" '
|
179 |
-
" '
|
180 |
-
" '
|
181 |
-
" '
|
182 |
-
" '
|
183 |
-
" '
|
184 |
-
" '
|
185 |
-
" '
|
186 |
-
" '
|
187 |
-
" '
|
188 |
-
" '
|
189 |
-
" '
|
190 |
-
" '
|
191 |
-
" '
|
192 |
-
" '
|
193 |
-
" '
|
194 |
-
" '
|
195 |
-
" '
|
196 |
-
" '
|
197 |
-
" '
|
198 |
-
" '
|
199 |
-
" '
|
200 |
-
" '
|
201 |
-
" '
|
202 |
-
" '
|
203 |
-
" '
|
204 |
-
" '
|
205 |
-
" '
|
206 |
-
" '
|
207 |
-
" '
|
208 |
-
" '
|
209 |
-
" '
|
210 |
-
" '
|
211 |
-
" '
|
212 |
-
" '
|
213 |
-
" '
|
214 |
-
" '
|
215 |
-
" '
|
216 |
-
" '
|
217 |
-
" '
|
218 |
-
" '
|
219 |
-
" '
|
220 |
-
" '
|
221 |
-
" '
|
222 |
-
" '
|
223 |
-
" '
|
224 |
-
" '
|
225 |
-
" '
|
226 |
-
" '
|
227 |
-
" '
|
228 |
-
" '
|
229 |
-
" '
|
230 |
-
" '
|
231 |
-
" '
|
232 |
-
" '
|
233 |
-
" '
|
234 |
-
" '
|
235 |
-
" '
|
236 |
-
" '
|
237 |
-
" '
|
238 |
-
" '
|
239 |
-
" '
|
240 |
-
" '
|
241 |
-
" '
|
242 |
-
" '
|
243 |
-
" '
|
244 |
-
" '
|
245 |
-
" '
|
246 |
-
" '
|
247 |
-
" '
|
248 |
-
" '
|
249 |
-
" '
|
250 |
-
" '
|
251 |
-
" '
|
252 |
-
" '
|
253 |
-
" '
|
254 |
-
" '
|
255 |
-
" '
|
256 |
-
" '
|
257 |
-
" '
|
258 |
-
" '
|
259 |
-
" '
|
260 |
-
" '
|
261 |
-
" '
|
262 |
-
" '
|
263 |
-
" '
|
264 |
-
" '
|
265 |
-
" '
|
266 |
-
" '
|
267 |
-
" '
|
268 |
-
" '
|
269 |
-
" '
|
270 |
-
" '
|
271 |
-
" '
|
272 |
-
" '
|
273 |
-
" '
|
274 |
-
" '
|
275 |
-
" '
|
276 |
-
" '
|
277 |
-
" '
|
278 |
-
" '
|
279 |
-
" '
|
280 |
-
" '
|
281 |
-
" '
|
282 |
-
" '
|
283 |
-
" '
|
284 |
-
" '
|
285 |
-
" '
|
286 |
-
" '
|
287 |
-
" '
|
288 |
-
" '
|
289 |
-
" '
|
290 |
-
" '
|
291 |
-
" '
|
292 |
-
" '
|
293 |
-
" '
|
294 |
-
" '
|
295 |
-
" '
|
296 |
-
" '
|
297 |
-
" '
|
298 |
-
" '
|
299 |
-
" '
|
300 |
-
" '
|
301 |
-
" '
|
302 |
-
" '
|
303 |
-
" '
|
304 |
-
" '
|
305 |
-
" '
|
306 |
-
" '
|
307 |
-
" '
|
308 |
-
" '
|
309 |
-
" '
|
310 |
-
" '
|
311 |
-
" '
|
312 |
-
" '
|
313 |
-
" '
|
314 |
-
" '
|
315 |
-
" '
|
316 |
-
" '
|
317 |
-
" '
|
318 |
-
" '
|
319 |
-
" '
|
320 |
-
" '
|
321 |
-
" '
|
322 |
-
" '
|
323 |
-
" '
|
324 |
-
" '
|
325 |
-
" '
|
326 |
-
" '
|
327 |
-
" '
|
328 |
-
" '
|
329 |
-
" '
|
330 |
-
" '
|
331 |
-
" '
|
332 |
-
" '
|
333 |
-
" '
|
334 |
-
" '
|
335 |
-
" '
|
336 |
-
" '
|
337 |
-
" '
|
338 |
-
" '
|
339 |
-
" '
|
340 |
-
" '
|
341 |
-
" '
|
342 |
-
" '
|
343 |
-
" '
|
344 |
-
" '
|
345 |
-
" '
|
346 |
-
" '
|
347 |
-
" '
|
348 |
-
" '
|
349 |
-
" '
|
350 |
-
" '
|
351 |
-
" '
|
352 |
-
" '
|
353 |
-
" '
|
354 |
-
" '
|
355 |
-
" '
|
356 |
-
" '
|
357 |
-
" '
|
358 |
-
" '
|
359 |
-
" '
|
360 |
-
" '
|
361 |
-
" '
|
362 |
-
" '
|
363 |
-
" '
|
364 |
-
" '
|
365 |
-
" '
|
366 |
-
" '
|
367 |
-
" '
|
368 |
-
" '
|
369 |
-
" '
|
370 |
-
" '
|
371 |
-
" '
|
372 |
-
" '
|
373 |
-
" '
|
374 |
-
" '
|
375 |
-
" '
|
376 |
-
" '
|
377 |
-
" '
|
378 |
-
" '
|
379 |
-
" '
|
380 |
-
" '
|
381 |
-
" '
|
382 |
-
" '
|
383 |
-
" '
|
384 |
-
" '
|
385 |
-
" '
|
386 |
-
" '
|
387 |
-
" '
|
388 |
-
" '
|
389 |
-
" '
|
390 |
-
" '
|
391 |
-
" '
|
392 |
-
" '
|
393 |
-
" '
|
394 |
-
" '
|
395 |
-
" '
|
396 |
-
" '
|
397 |
-
" '
|
398 |
-
" '
|
399 |
-
" '
|
400 |
-
" '
|
401 |
-
" '
|
402 |
-
" '
|
403 |
-
" '
|
404 |
-
" '
|
405 |
-
" '
|
406 |
-
" '
|
407 |
-
" '
|
408 |
-
" '
|
409 |
-
" '
|
410 |
-
" '
|
411 |
-
" '
|
412 |
-
" '
|
413 |
-
" '
|
414 |
-
" '
|
415 |
-
" '
|
416 |
-
" '
|
417 |
-
" '
|
418 |
-
" '
|
419 |
-
" '
|
420 |
-
" '
|
421 |
-
" '
|
422 |
-
" '
|
423 |
-
" '
|
424 |
-
" '
|
425 |
-
" '
|
426 |
-
" '
|
427 |
-
" '
|
428 |
-
" '
|
429 |
-
" '
|
430 |
-
" '
|
431 |
-
" '
|
432 |
-
" '
|
433 |
-
" '
|
434 |
-
" '
|
435 |
-
" '
|
436 |
-
" '
|
437 |
-
" '
|
438 |
-
" '
|
439 |
-
" '
|
440 |
-
" '
|
441 |
-
" '
|
442 |
-
" '
|
443 |
-
" '
|
444 |
-
" '
|
445 |
-
" '
|
446 |
-
" '
|
447 |
-
" '
|
448 |
-
" '
|
449 |
-
" '
|
450 |
-
" '
|
451 |
-
" '
|
452 |
-
" '
|
453 |
-
" '
|
454 |
-
" '
|
455 |
-
" '
|
456 |
-
" '
|
457 |
-
" '
|
458 |
-
" '
|
459 |
-
" '
|
460 |
-
" '
|
461 |
-
" '
|
462 |
-
" '
|
463 |
-
" '
|
464 |
-
" '
|
465 |
-
" '
|
466 |
-
" '
|
467 |
-
" '
|
468 |
-
" '
|
469 |
-
" '
|
470 |
-
" '
|
471 |
-
" '
|
472 |
-
" '
|
473 |
-
" '
|
474 |
-
" '
|
475 |
-
" '
|
476 |
-
" '
|
477 |
-
" '
|
478 |
-
" '
|
479 |
-
" '
|
480 |
-
" '
|
481 |
-
" '
|
482 |
-
" '
|
483 |
-
" '
|
484 |
-
" '
|
485 |
-
" '
|
486 |
-
" '
|
487 |
-
" '
|
488 |
-
" '
|
489 |
-
" '
|
490 |
-
" '
|
491 |
-
" '
|
492 |
-
" '
|
493 |
-
" '
|
494 |
-
" '
|
495 |
-
" '
|
496 |
-
" '
|
497 |
-
" '
|
498 |
-
" '
|
499 |
-
" '
|
500 |
-
" '
|
501 |
-
" '
|
502 |
-
" '
|
503 |
-
" '
|
504 |
-
" '
|
505 |
-
" '
|
506 |
-
" '
|
507 |
-
" '
|
508 |
-
" '
|
509 |
-
" '
|
510 |
-
" '
|
511 |
-
" '
|
512 |
-
" '
|
513 |
-
" '
|
514 |
-
" '
|
515 |
-
" '
|
516 |
-
" '
|
517 |
-
" '
|
518 |
-
" '
|
519 |
-
" '
|
520 |
-
" '
|
521 |
-
" '
|
522 |
-
" '
|
523 |
-
" '
|
524 |
-
" '
|
525 |
-
" 'f56b41ea938547eaac61edabd71e0cc2',\n",
|
526 |
-
" '55708b9971954a77a64440b4e2a4d437',\n",
|
527 |
-
" '6052cc5180aa43359948f92a2fba7fd2',\n",
|
528 |
-
" '2a9f782eb0b94d2381c2b902b89313db',\n",
|
529 |
-
" 'fcb302874996442296870bdff15b2d4f',\n",
|
530 |
-
" '0864ff8559dc43be94959f7493dd6067',\n",
|
531 |
-
" '7bc9df2622734502bfacdd235b66edd1',\n",
|
532 |
-
" 'a01170164ec84b7194848a9021586d99',\n",
|
533 |
-
" 'e2c7ea2f03cd4100bef06b31c15d5df6',\n",
|
534 |
-
" 'b85b7a4a8660444fa704ecef67e5978c']"
|
535 |
]
|
536 |
},
|
537 |
-
"execution_count":
|
538 |
"metadata": {},
|
539 |
"output_type": "execute_result"
|
540 |
}
|
@@ -547,7 +555,7 @@
|
|
547 |
"\n",
|
548 |
"dimension = 1024\n",
|
549 |
"collection_name = \"ai-safety-sr-arctic-embed-l-semantic\"\n",
|
550 |
-
"qdrant_server = \"
|
551 |
"qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ[\"QDRANT_API_KEY\"])\n",
|
552 |
"qdrant_client.create_collection(\n",
|
553 |
" collection_name=collection_name,\n",
|
@@ -560,15 +568,711 @@
|
|
560 |
" embedding=embedding_model,\n",
|
561 |
")\n",
|
562 |
"\n",
|
563 |
-
"vector_store.add_documents(chunked_docs)"
|
|
|
564 |
]
|
565 |
},
|
566 |
{
|
567 |
"cell_type": "code",
|
568 |
-
"execution_count":
|
569 |
"metadata": {},
|
570 |
"outputs": [],
|
571 |
-
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
572 |
}
|
573 |
],
|
574 |
"metadata": {
|
|
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
+
"execution_count": 2,
|
28 |
"metadata": {},
|
29 |
"outputs": [],
|
30 |
"source": [
|
|
|
40 |
},
|
41 |
{
|
42 |
"cell_type": "code",
|
43 |
+
"execution_count": 3,
|
44 |
"metadata": {},
|
45 |
"outputs": [
|
46 |
{
|
47 |
"name": "stderr",
|
48 |
"output_type": "stream",
|
49 |
"text": [
|
50 |
+
"/Users/jeevan/Documents/Learnings/ai-engineering-bootcamp/AIE4/mid-term/ai-safety-chatty/.venv/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:13: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
51 |
" from tqdm.autonotebook import tqdm, trange\n"
|
52 |
]
|
53 |
}
|
|
|
62 |
},
|
63 |
{
|
64 |
"cell_type": "code",
|
65 |
+
"execution_count": 12,
|
66 |
"metadata": {},
|
67 |
"outputs": [],
|
68 |
"source": [
|
|
|
75 |
")\n",
|
76 |
"documents = await pdf_loader.aload()\n",
|
77 |
"\n",
|
78 |
+
"text_splitter = SemanticChunker(embedding_model, buffer_size=5, breakpoint_threshold_type=\"percentile\",breakpoint_threshold_amount=90)\n",
|
79 |
"\n",
|
80 |
"chunked_docs = text_splitter.split_documents(documents)\n"
|
81 |
]
|
82 |
},
|
83 |
+
{
|
84 |
+
"cell_type": "code",
|
85 |
+
"execution_count": 23,
|
86 |
+
"metadata": {},
|
87 |
+
"outputs": [],
|
88 |
+
"source": [
|
89 |
+
"# Recursive splitter\n",
|
90 |
+
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
91 |
+
"recursive_text_splitter = RecursiveCharacterTextSplitter(\n",
|
92 |
+
" chunk_size = 1024,\n",
|
93 |
+
" chunk_overlap = 100,\n",
|
94 |
+
" length_function = len,\n",
|
95 |
+
")\n",
|
96 |
+
"recursive_chunked_docs = recursive_text_splitter.split_documents(documents)"
|
97 |
+
]
|
98 |
+
},
|
99 |
{
|
100 |
"cell_type": "code",
|
101 |
"execution_count": 4,
|
|
|
105 |
"import os\n",
|
106 |
"import getpass\n",
|
107 |
"\n",
|
108 |
+
"os.environ[\"QDRANT_API_URL\"] = getpass.getpass(\"Enter Your Qdrant API URL: \")\n",
|
109 |
+
"os.environ[\"QDRANT_API_KEY\"] = getpass.getpass(\"Enter Your Qdrant API Key: \")\n",
|
110 |
+
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter Your OpenAI API Key: \")\n"
|
111 |
]
|
112 |
},
|
113 |
{
|
114 |
"cell_type": "code",
|
115 |
+
"execution_count": 13,
|
116 |
"metadata": {},
|
117 |
"outputs": [
|
118 |
{
|
119 |
"data": {
|
120 |
"text/plain": [
|
121 |
+
"['8dd5b1e7fd464e2a90c28a8eea8b0cb9',\n",
|
122 |
+
" '906e0c268d564dbc89c0b8398e235407',\n",
|
123 |
+
" '4b81191a4cc94fbd835dc9c942e9543a',\n",
|
124 |
+
" '25c3b7fffa8d4bc29790057fe2f4d025',\n",
|
125 |
+
" '3ad5906a8a274b56bd05e4ac39ffe459',\n",
|
126 |
+
" 'e3fa01bef57c489ca014be2e589b7ef1',\n",
|
127 |
+
" 'af5fc5121c6a438a8fc5dea454b7e92f',\n",
|
128 |
+
" '80500cf02d5748c39b1c62288459c306',\n",
|
129 |
+
" '5db6eebee14b4aafa948e4f9aa4f7aa2',\n",
|
130 |
+
" '99385298e8744643822e01525bdff89e',\n",
|
131 |
+
" 'eddc9704820d4005b7c62a5085f69454',\n",
|
132 |
+
" '4324a624f4054ae5baa7270d9f6aaa56',\n",
|
133 |
+
" '9eb24bea31a749f1b7a86ac2b186ec14',\n",
|
134 |
+
" '7e9c9763bebf40cea1833ea6ad376eeb',\n",
|
135 |
+
" 'cc8846008cac472e88eb16497c560a15',\n",
|
136 |
+
" '5af0886e387449fc89f1d0e82c32c590',\n",
|
137 |
+
" '824ae7c1c15a43c8b62713f02d91e0b5',\n",
|
138 |
+
" 'f0ef1b30251b4429ad7d902b85fafcf8',\n",
|
139 |
+
" '314a75e55d1b4c1fa46f49610d745f95',\n",
|
140 |
+
" '66828a5f9536480bbd08d94f087bc44b',\n",
|
141 |
+
" '8230b8add982486f9ac8e120a27d3aec',\n",
|
142 |
+
" 'dd1c75bb5c1441468ac8e7d4595bf0b9',\n",
|
143 |
+
" 'a9b1b1b87eeb48b78ed4cf6adddee9d2',\n",
|
144 |
+
" 'eeacab16c9d94d08a791c516e0a65f6b',\n",
|
145 |
+
" '187badb4dc064743898f5e5218114250',\n",
|
146 |
+
" '0ecc4e873fe047ce8afc33e19fe40c3f',\n",
|
147 |
+
" 'be7b81185ce140229bee6d1306120528',\n",
|
148 |
+
" 'd8a9a361dc8a4917aefc2e0a17efafad',\n",
|
149 |
+
" '7ca3b3bf947e449e8f58cd4fca12d884',\n",
|
150 |
+
" '07a8b46151e74641b97ad823ef91082f',\n",
|
151 |
+
" '421004f00dbb4a47a81c424ad5f64e39',\n",
|
152 |
+
" '53750f6e3cfa481d971d7e5a6b9f55c5',\n",
|
153 |
+
" 'e9f9524be6884599893590ad5acdb12b',\n",
|
154 |
+
" '446225894d9747e0a888b596875ac83b',\n",
|
155 |
+
" '7677215528c44061bfb018e42a13e528',\n",
|
156 |
+
" '183002bfeeaa4b75968dda61451c2f37',\n",
|
157 |
+
" '818cd888dd6b4735a602949aea2ea900',\n",
|
158 |
+
" 'c803c1690b5549a5a13578b2cd757b17',\n",
|
159 |
+
" '864bc201486e42bdb4caef6a266fa1ee',\n",
|
160 |
+
" '02cae44fcb9e4d2eb60a6da08b01a4e6',\n",
|
161 |
+
" '384e2a4f36d14359b22b0c0cdf463cd9',\n",
|
162 |
+
" '6de4fdb915164aa68e076a2d6e1913ae',\n",
|
163 |
+
" '8da0e4ecc526416ca3be4043879ea17b',\n",
|
164 |
+
" 'e63359693473494d922f996b57c65d3b',\n",
|
165 |
+
" '76313322b20b4e368f2aaefbf911df6b',\n",
|
166 |
+
" '455bced7b2544b69a5c547d998548748',\n",
|
167 |
+
" 'd1abce93130d4eb49cdc1aca8b7c9c60',\n",
|
168 |
+
" '966dde23cfa144b899b60a3659f32eb4',\n",
|
169 |
+
" '4fe01c77f1ae4b70a18c56fc6cb9679d',\n",
|
170 |
+
" '7aa14bc014fa445cbc061c47a3fe1c31',\n",
|
171 |
+
" '0aa076f900614fcfb37883057c67e6c1',\n",
|
172 |
+
" '2bae6b24f8234792b914a75712fb89fc',\n",
|
173 |
+
" '74fb5f5dfdb8413a9afaaf472a009ac2',\n",
|
174 |
+
" 'b1c95e4126e842d598eae6fbc455b82c',\n",
|
175 |
+
" '2e326893157844fe88f762b96aa46b99',\n",
|
176 |
+
" 'd27b5c85573d44658a4d338c39890629',\n",
|
177 |
+
" '1f967a5cae05418d94f9f3d07dcce74e',\n",
|
178 |
+
" '7667f8b8e5914417a68e7d41256cfc98',\n",
|
179 |
+
" '7fcd90b390494d3686e532a6528bd021',\n",
|
180 |
+
" '8c64aecd850b48b3bac216e73ebad1e0',\n",
|
181 |
+
" 'ca845f98d6b44c02b9f9edafcd75aa2e',\n",
|
182 |
+
" '571b12484be44ea6b406e6cdcd0662a5',\n",
|
183 |
+
" 'fa099bf314614075bf8bfc58110f52f1',\n",
|
184 |
+
" 'b98bfe7e8c234ed59b5d893929bac64e',\n",
|
185 |
+
" '3bc58c54665d4a4ca394ebb13debfabc',\n",
|
186 |
+
" 'a276d45eccd54056a8855ccdb5907df3',\n",
|
187 |
+
" '0f9ac79d967942d0b56b9fe200dc7846',\n",
|
188 |
+
" '07c5e3794b244e28b384dad31a2c63a2',\n",
|
189 |
+
" 'deb29326ff1e4fec934c35710d4e0dcf',\n",
|
190 |
+
" '5278dac4122044879b9dd1a7c557b7a8',\n",
|
191 |
+
" '041338e04daa4482a7d65c311ab0f3f5',\n",
|
192 |
+
" '1ed9bbc381b1423a95ef935cc16e277c',\n",
|
193 |
+
" '20bb221a9b654bbc99edc812475adc12',\n",
|
194 |
+
" '188fba73978143e8a22370774e1d31fd',\n",
|
195 |
+
" '3a65a41271a947848aba4939473d0f85',\n",
|
196 |
+
" 'c5cee035027048338a81b9bf0830cf57',\n",
|
197 |
+
" '03325dbfeb164512a565172407ae0ec3',\n",
|
198 |
+
" '4494b4f19cff440281c034ede5e675e1',\n",
|
199 |
+
" '2b55dd78b0f5461d917eeaec2a75dcb7',\n",
|
200 |
+
" 'a2aef7ef0741438fb643a4fb225f1ffd',\n",
|
201 |
+
" '3886d787efdd4d2bb3fc702ffd911db8',\n",
|
202 |
+
" '71455bc57478429e8a1269ce7332302a',\n",
|
203 |
+
" '0c2896f473c749f9888b5723ee834a5d',\n",
|
204 |
+
" '1a8507e2597049dc8287d21172dfe518',\n",
|
205 |
+
" '943e65c033774aed99116750cfbb5f5a',\n",
|
206 |
+
" '78bfa949218f48c7b365569f2c3396ff',\n",
|
207 |
+
" '6812e197478a40bf86cc363d11fc0856',\n",
|
208 |
+
" 'e9cd2bf8ad454aa7af9446e62d9d845d',\n",
|
209 |
+
" 'ca970fdebb1e4d79853090a7c73722f5',\n",
|
210 |
+
" '84400697300c4d468f5c58d09fd63d56',\n",
|
211 |
+
" 'ad1bf5c566a147e6b66f9f3502a227fd',\n",
|
212 |
+
" '0c620c86587a4ae8975a0d066eb80e97',\n",
|
213 |
+
" '1aa1ebd384774410ac011ca3e535808f',\n",
|
214 |
+
" 'f30f7dba0182402c8abce8d9b07df99b',\n",
|
215 |
+
" '73dd6c906bd14d3494dc8def54680e0f',\n",
|
216 |
+
" '632c7eafa1c048979b65c5e8ecacb98e',\n",
|
217 |
+
" 'b3edcc98879747dbbed2e8b6e19e1baf',\n",
|
218 |
+
" '4f4bef9639bb454e87cb61051e2d4c82',\n",
|
219 |
+
" 'd0c059bc4e04474f9355c87964b3b470',\n",
|
220 |
+
" '412fa6deb62546c4a092988f96ddb425',\n",
|
221 |
+
" '2283f3ef3e7141738c8966fa4333ba05',\n",
|
222 |
+
" '7ed4340667d643b193b45f7f21a238ed',\n",
|
223 |
+
" '9209830e86c54dbd9974cc737bbdbe91',\n",
|
224 |
+
" '89e28024e5f14377a6b2efa1997f370a',\n",
|
225 |
+
" '2f4b769a22b24bb49bed8917adae1f9e',\n",
|
226 |
+
" 'fcfc743d434d43b886afb80c5377e1fd',\n",
|
227 |
+
" '300f3cfca0874ee2818241856f2175df',\n",
|
228 |
+
" '55ea80ccf78f4c9cb622c1451951e723',\n",
|
229 |
+
" '4b755a511dbd46f79eb4b3bda119e79c',\n",
|
230 |
+
" '956a7f2f70854e1a82dfc542fd761492',\n",
|
231 |
+
" '190c9cd3f01a4ea894877f4ab35000bf',\n",
|
232 |
+
" '381a0acf443244c78f303b9f6b72535f',\n",
|
233 |
+
" 'd7afcec2075343b19d1320605cc41b46',\n",
|
234 |
+
" '7898f81259be4c42b44cdfd3b41aa25b',\n",
|
235 |
+
" 'd86e65ae1f8140e299ccf27583735b7a',\n",
|
236 |
+
" 'ee553712557545a0b0320adc4d563bca',\n",
|
237 |
+
" '6fb6a0b739a64b909cee096d722a4f6e',\n",
|
238 |
+
" 'be1514e798f04581af33447b86f002c8',\n",
|
239 |
+
" 'c92113c2d4344ce8a10e7a6c1d089f4e',\n",
|
240 |
+
" 'dc47dd1e29bb49768c2b88054f91c69f',\n",
|
241 |
+
" 'd72643a36bf6415795a3694f93e5c376',\n",
|
242 |
+
" '0709e83c2d974cb7aa30b17f8a5e5050',\n",
|
243 |
+
" 'f4bc420a5b4c4cdd91c4441837fbcfae',\n",
|
244 |
+
" '712e9898ff5f44eda54aebcf54931760',\n",
|
245 |
+
" '1916383ddb404a32ab833c2add8e2511',\n",
|
246 |
+
" '4a01c037505943a3be4fa183de7d5c73',\n",
|
247 |
+
" 'cb29c514ccf8416491654441749f9889',\n",
|
248 |
+
" '57cc735133754979862b8dd27ccf45d8',\n",
|
249 |
+
" '6e28a749e0ee49d8b6ba562ab268e474',\n",
|
250 |
+
" 'd915b593c4194759a4ca48304ce54b56',\n",
|
251 |
+
" '83bf4923ef4847b1b3faa0a85ce85d9c',\n",
|
252 |
+
" 'c7165907a5774d7a9cfe034328875f16',\n",
|
253 |
+
" 'fb83c16fe2ac4a0b8a2cdd3a099b0751',\n",
|
254 |
+
" 'd8d56a63f4104e1d9b06c5c8d6246d4d',\n",
|
255 |
+
" 'b92d479d4fea486980ce1133ef0d9049',\n",
|
256 |
+
" 'b264b04721c14738a6e018c3d089e3e2',\n",
|
257 |
+
" 'bf50880e770d4e2a80415e87b8f95788',\n",
|
258 |
+
" '5ca16b29007f4b919ac1f3fdf261aa10',\n",
|
259 |
+
" '30f3d89d1ae042afb3b745451e0a5fdc',\n",
|
260 |
+
" '41dd324a662a4e79935980dc8e53ab8f',\n",
|
261 |
+
" '7590bcc7d6d540b1bd92a7ce69c0e9b2',\n",
|
262 |
+
" '72973a23774d4bebb9c42dcf885ae06a',\n",
|
263 |
+
" 'ce3e692e73084116ae834e72349032a7',\n",
|
264 |
+
" '044469614deb404f8d3b1860907e0f75',\n",
|
265 |
+
" '3805bb3205c5411daf2a64a7742e59fe',\n",
|
266 |
+
" '03aa772e62b44423b75ec05c90e8687e',\n",
|
267 |
+
" '740e824d876d44c7b30599b4dcb8eb44',\n",
|
268 |
+
" 'ba27340cf2144d15bee2a5f5b7e00622',\n",
|
269 |
+
" '4c6a7847bc554fddadd0a884c26612bc',\n",
|
270 |
+
" '8a13a8d664c4453b8f71c01b28ec8dff',\n",
|
271 |
+
" '113a3db0cd0d4e0f8067c5fa074967df',\n",
|
272 |
+
" '28af19cb148f49049336aa1b52c14a98',\n",
|
273 |
+
" '795bc0bc5a9c4ff8b472e2a9c9c59dbd',\n",
|
274 |
+
" 'fb36fe1dad1c4280a7186ea5c20e64dc',\n",
|
275 |
+
" 'a50e1d30a67b4144bd8ce5ab32f1cd3a',\n",
|
276 |
+
" 'f1ccd9cc27b0414f96243f1c63a07fd5',\n",
|
277 |
+
" '3d2b64f6ceb74744b6b8374728142334',\n",
|
278 |
+
" '15f314cbb8a14f9286a814cafef76192',\n",
|
279 |
+
" 'ac540651b7a34d50b70e4c44cf25b3ca',\n",
|
280 |
+
" '6b87356c50d1404abe0a676b7f322a72',\n",
|
281 |
+
" '67902b525d014249900e54257590f7ea',\n",
|
282 |
+
" 'fc1189d79c824a74bd60dd5dc341aa2b',\n",
|
283 |
+
" 'b26845eaa60246399cef48b0a13d11b5',\n",
|
284 |
+
" '7b592acd329743a8a7a3b2569a048416',\n",
|
285 |
+
" '189e134f601441cc9f1514a778e3c820',\n",
|
286 |
+
" 'f7f1425e4c2d4e1fa9040ee85d368bf1',\n",
|
287 |
+
" '0297a5233d6f4275baf0a9957b0dc586',\n",
|
288 |
+
" '5afeb076d11841c890517fc92d0aa6f9',\n",
|
289 |
+
" '79fa6ed91f7a47b6bd764e1c8b412fa5',\n",
|
290 |
+
" 'ff4b75e4daaf4588ae69ba2f83816c15',\n",
|
291 |
+
" '84b7e45334a3477f8d8a64e3504fb620',\n",
|
292 |
+
" 'dbf780a26828491da830425df5a7a03c',\n",
|
293 |
+
" '25a6912b64f442f99f5787bef114ebff',\n",
|
294 |
+
" '00c07386007a4dc18072a431f7cf83b1',\n",
|
295 |
+
" 'f84828d74c0c446389732b8eb4d6570f',\n",
|
296 |
+
" 'a12ba2aaf84640a8816d9ce8e8a417e9',\n",
|
297 |
+
" '02223a887b2c4ec0891d45e75836b00e',\n",
|
298 |
+
" 'a62632e5379a4af5be885b4750d18650',\n",
|
299 |
+
" '5ef2d149eb314d879897648027e7e8aa',\n",
|
300 |
+
" '1a2c86d6906141b18700239300599566',\n",
|
301 |
+
" '584530895cc74af58cdb016c0ed63bc5',\n",
|
302 |
+
" 'e19a1e82a1ec4884a7c72f2996ac927f',\n",
|
303 |
+
" 'dbcd348813cc4365bf65c549333e669d',\n",
|
304 |
+
" '8bac146f886b4272bd40f51adb35c32b',\n",
|
305 |
+
" 'cae2438601594d6fb39d99d617fe6c0e',\n",
|
306 |
+
" 'e8306f6a959b4a219d096b968784c44e',\n",
|
307 |
+
" 'ebbdefa7da15403294655048c6fe3624',\n",
|
308 |
+
" '60579bd40852405b8345114456963981',\n",
|
309 |
+
" '211f66d20b5c451d91f310594b854ea0',\n",
|
310 |
+
" '6d726194ac8641a6a5f6d8ce3f192a7c',\n",
|
311 |
+
" 'c0154e0f56b049048f9bcb7f718173f2',\n",
|
312 |
+
" '081a6179661e41f69ab10b92027d161e',\n",
|
313 |
+
" '2c149c226d504053bfa94532a850efd1',\n",
|
314 |
+
" 'f833250f67bc4329922a5a7f7b7d07f7',\n",
|
315 |
+
" 'a0c1e7c49351406ca3567622b6b1e38e',\n",
|
316 |
+
" '72abd1e5dc824d6c8852f7331990b6f0',\n",
|
317 |
+
" '35c02c07ae484045a325940fcbe098fb',\n",
|
318 |
+
" '63b83f297d1d486e84ddb42c2af32900',\n",
|
319 |
+
" '17fb247ed7bc4599a8de06966e744b2a',\n",
|
320 |
+
" '371ea02f2a7e4e11b82ed0593a26a806',\n",
|
321 |
+
" '6847a89d7a944bf2bd95430c4d63def7',\n",
|
322 |
+
" 'a186a88983e64831bf42523b6522d706',\n",
|
323 |
+
" 'e3538719cffb4cb59efa815b27b4bf81',\n",
|
324 |
+
" '8ae7bdfed4e249dd98727cbb4f34259d',\n",
|
325 |
+
" 'd4f18e23e8444ffba7b13661b22ba1a9',\n",
|
326 |
+
" 'ab0aace578b2457cb10966f9a57dbfce',\n",
|
327 |
+
" '641437d62c1940d7a7d0711391802aa6',\n",
|
328 |
+
" '3f88fe5f258244d581a053a53b844bdc',\n",
|
329 |
+
" '5022a64b46dc4989b2a919193cdec7da',\n",
|
330 |
+
" 'e620e702fb604457b4724ce0f753138d',\n",
|
331 |
+
" 'ff9557d7d3c446a39829de03605a5254',\n",
|
332 |
+
" '5314c1de556d4b96af06fd3ac37cf1f4',\n",
|
333 |
+
" '56d98670e8f74b1881bac44ccfb9267d',\n",
|
334 |
+
" '2d06bf2cf37a48f98708e345e86a6114',\n",
|
335 |
+
" '1d6be38e70b74ce69cf68c32fdef0b9f',\n",
|
336 |
+
" '936184dab9ab4ff9905cbaccc0844e61',\n",
|
337 |
+
" 'ec080c630727417fa858340935e0557d',\n",
|
338 |
+
" 'f6792c2c58774d4e9ba97575ae5a9ddb',\n",
|
339 |
+
" 'c1df1df5060e41899363e2a0649100e4',\n",
|
340 |
+
" 'b2eb147898c64b359c951297318e6831',\n",
|
341 |
+
" '216d56dfdaae4c098b826c2c6dbe8132',\n",
|
342 |
+
" '7921cab4d11646168b5d186794f5db24',\n",
|
343 |
+
" 'b89e6d297f064e708c4fb903c6ebf15a',\n",
|
344 |
+
" '4ee471c58dbe4185b6968113228bb20e',\n",
|
345 |
+
" 'd64a74ed5b57427c9c6ce98a9f945b70',\n",
|
346 |
+
" '58cd9cbd849c456e85fb72a4abc5c69c',\n",
|
347 |
+
" '2e1835610aa749c896c8c165e3d84470',\n",
|
348 |
+
" '8008134cc1c44751bb95a3270cb89a44',\n",
|
349 |
+
" 'e4f098d6b9024392adc396aad0efb94f',\n",
|
350 |
+
" 'e8fcb91812d048efa5ba38a46cf40531',\n",
|
351 |
+
" '987e19cc2d674e2aa0d555af45ef874f',\n",
|
352 |
+
" '2816810a2eea4f0081baae4b28614796',\n",
|
353 |
+
" 'ffd0647c27664a779dabf843fcf83981',\n",
|
354 |
+
" '7a5e718e26b14f4daf674c901b3dde93',\n",
|
355 |
+
" 'a7248347d0b743d7b5db65f3b1b87cac',\n",
|
356 |
+
" '640f3399c6c340f19a11919a6402ba85',\n",
|
357 |
+
" 'fe73657d7e884bd48d93afdc193b73bb',\n",
|
358 |
+
" 'a9c649a44de94b5f82af06c804e3bb08',\n",
|
359 |
+
" '8265df5e7847443a8c91478ae1cabf72',\n",
|
360 |
+
" 'a3bb9ab12f814c4faa382b586fe13680',\n",
|
361 |
+
" '775f072a48674d6b9fdd0671c4064891',\n",
|
362 |
+
" '21048806b4c74f5db5b7f873c45adedb',\n",
|
363 |
+
" '1525157eac174ab684089f50e6c29969',\n",
|
364 |
+
" '136132cedfec4e3bacf2a8adc1fbd50f',\n",
|
365 |
+
" '08099904461749ddba96b17b61226622',\n",
|
366 |
+
" '1da17dd87dd448c4902f8e3a1ad1c51e',\n",
|
367 |
+
" 'fa9c4880b4a34b9aaca3f5363ff1b7f0',\n",
|
368 |
+
" '30eeafb17ae74a42b370e173e22abfea',\n",
|
369 |
+
" '0d3ad50f8c524e90b6c440865aaf63d8',\n",
|
370 |
+
" '51d52addb9df4c04afaf8f008fe89259',\n",
|
371 |
+
" '2aa50c713fe241f1b9b44107c0d47945',\n",
|
372 |
+
" '8615b8c9442c4031aee25316cdfc7cb5',\n",
|
373 |
+
" '8f8fd8a1edc043ee82c77381bf39a83c',\n",
|
374 |
+
" 'f15223b9a98445f2b7613e518e7bca83',\n",
|
375 |
+
" '6a437e8d6655430aaef679f6c6a84831',\n",
|
376 |
+
" 'c0a3331686754cd9929c4abd5d81dc7f',\n",
|
377 |
+
" 'ff36401f33d9424cb7112033ead5f58c',\n",
|
378 |
+
" '841220093a1242f0b04a4ac8d852e280',\n",
|
379 |
+
" '655ec122f3d24c069eecacc8e8bc8f82',\n",
|
380 |
+
" '6da4281d97ad46ed9ef6dd169c640afe',\n",
|
381 |
+
" 'eeef919024d54063ae3cd6c6f8f7a73a',\n",
|
382 |
+
" '9f55e4818e0c4bef82dec178dc64eaa4',\n",
|
383 |
+
" '8a652387c6eb424288a0022be058d00a',\n",
|
384 |
+
" '9a19d1114e674c618d23a1299f14f1ba',\n",
|
385 |
+
" '9e4b5fdbbda24ed5a2fbbc3923847a44',\n",
|
386 |
+
" '8b442e6de7bf401b8300c567a642a759',\n",
|
387 |
+
" '20c8152b25514d018eeb8542b4450ad2',\n",
|
388 |
+
" '5ae8d5d8230f46cab713cecbd97c847e',\n",
|
389 |
+
" 'e159ce134b7b46308fc919b22a9e808f',\n",
|
390 |
+
" '5a0d312175db4d15b85c0255b68bc027',\n",
|
391 |
+
" '6f82139d091145cba88a7f0fc367063d',\n",
|
392 |
+
" 'ee60e7e10d924f01b530b0291d939aae',\n",
|
393 |
+
" '3b73fe9ad953458bbd3d11f44b85fce5',\n",
|
394 |
+
" 'd15e56f06ba24e3cb6a1c4dd0568201d',\n",
|
395 |
+
" '16f3d27489ff423dbf7d027844d957db',\n",
|
396 |
+
" '6091bda6320149a1af5ccfb541e75148',\n",
|
397 |
+
" 'ec527d7203164f07ae7349cfa33829b3',\n",
|
398 |
+
" '26b4090286e346b4b686b13360cceea3',\n",
|
399 |
+
" '856dd023a8cd41108eea38b403eadd09',\n",
|
400 |
+
" '5434f50e81db44e5b80d3bc8816eb5bf',\n",
|
401 |
+
" 'e9022b1d39f24ec09981e8c66478705d',\n",
|
402 |
+
" 'd3f895478ed74239b4bc88e04e215f1a',\n",
|
403 |
+
" 'c1fb1bae1b5e42f8a65f8260c259e133',\n",
|
404 |
+
" '2a86b477a07e48afb2658742c30494e8',\n",
|
405 |
+
" 'da64e968fc3047089de2ffa4b62a8c0a',\n",
|
406 |
+
" '0a42ebaaedae4f73914398ad1486afb4',\n",
|
407 |
+
" 'dc66a40fba5f4e348216910fce0d2428',\n",
|
408 |
+
" 'e125ed2241b24a31ba40ed768a21d4f6',\n",
|
409 |
+
" '850eda324b734ffdad0fe63c92c91038',\n",
|
410 |
+
" 'd9e9ca7b0a634afdac1f4da62f2dcddf',\n",
|
411 |
+
" '7cf441e12164420fa8b58e0aa6d244de',\n",
|
412 |
+
" '0a6c2f48e5094e3399f7e1d0f38d873f',\n",
|
413 |
+
" 'c8c29dbfc7f840d7a7195aa74388b30c',\n",
|
414 |
+
" '07a90e50dcbb4352baa6636e9b687aab',\n",
|
415 |
+
" '85d60bfe6d684c1a8578c1d6710c867b',\n",
|
416 |
+
" 'ceb46b27e8994626a6d6d1c1acabff5d',\n",
|
417 |
+
" '506dd325656145ebb7d976de3b4953c0',\n",
|
418 |
+
" 'a83e954196874363b13c7cb3d7d8e025',\n",
|
419 |
+
" '16fc61ea959d4427b3fa723d7e58f2bf',\n",
|
420 |
+
" '3d4599e2ad2f47deae8c1c25d30dec68',\n",
|
421 |
+
" '260482de224a4ec998459a5d2f9384bd',\n",
|
422 |
+
" 'd480305e9fc34a55b8f146343fe1dd8e',\n",
|
423 |
+
" '9851b805fcf54766bd482d5a0d4a8d0f',\n",
|
424 |
+
" '5147fb1a9a904ff09b7c6885567fa94e',\n",
|
425 |
+
" '509f1c4ef2b348af821461d751850e93',\n",
|
426 |
+
" 'a045c48dd9444211a2f0087229df189d',\n",
|
427 |
+
" '3b90d03add21451aa40990b1f2dad9e1',\n",
|
428 |
+
" 'fa68102a0555422db1cc0f3822496a48',\n",
|
429 |
+
" 'ade010ff9a2644a38c7c3de875a3ac78',\n",
|
430 |
+
" '390f54300e1f41ac9224da683f00d31a',\n",
|
431 |
+
" 'aacaee53fce14e9395259a0609cc1646',\n",
|
432 |
+
" '00ca0c3998b64339874ad036983a0922',\n",
|
433 |
+
" '2b3b851a8bb6422abab843dc2148255f',\n",
|
434 |
+
" '363ed4276aa144b2baeec9dbc1fba38c',\n",
|
435 |
+
" 'e9bf60fe2f184793b37f268ffa486abc',\n",
|
436 |
+
" '98b185639a5d441ca60d7a5fe7620f8c',\n",
|
437 |
+
" '9ae478f07ca3465a9a447b3c7eab4b26',\n",
|
438 |
+
" '01fc2f0676754dc7baae898343e2bebf',\n",
|
439 |
+
" 'cf3dd2f39b1a48919b90571555e4befb',\n",
|
440 |
+
" 'b0ef26aca0404662b5706ccfa737a52b',\n",
|
441 |
+
" '9afcf7171eb74e628f99fa44a753c131',\n",
|
442 |
+
" '26f3917cc2274e998e115212273fe2ba',\n",
|
443 |
+
" 'ec369428b5fe43138d049f293dcd21a4',\n",
|
444 |
+
" '947d581cb3cd4555933504b8c64c54d3',\n",
|
445 |
+
" '16b495459a4e4bfd96a12655defa9551',\n",
|
446 |
+
" 'af15b6c943834e96a24363fbdca209f3',\n",
|
447 |
+
" 'a01a9d5cb1c041889bfd1cf29cd4c08a',\n",
|
448 |
+
" 'd3d0ee8f3c394ed680324fdcdb442241',\n",
|
449 |
+
" 'd56148720f974dffab53a4e8917c3833',\n",
|
450 |
+
" '32593dde550f45af9a36349bcd63192c',\n",
|
451 |
+
" '1d43c4caf83b470897e96410f4dae5ee',\n",
|
452 |
+
" 'ee1cd8b353cf45e3a88ce76faeebc9a1',\n",
|
453 |
+
" '8e7135cd24764e94b8d04e15ec86b9c0',\n",
|
454 |
+
" 'a7a918cf594e4a2992398acc924e6015',\n",
|
455 |
+
" '13837b8571154abe83bb0b8d8e08d406',\n",
|
456 |
+
" '63a4ce49d82d4da6bcb4da66db26bf35',\n",
|
457 |
+
" '2214b1db8432499286a9ad49d8a2391c',\n",
|
458 |
+
" 'af23e4413b7c42cc982b011d6432ec5a',\n",
|
459 |
+
" 'e4b5e669227c4112aec7a7c53f568b75',\n",
|
460 |
+
" '2d5f634bb7414afcac7b78ce7c0a864b',\n",
|
461 |
+
" 'beda5449b3124e379f35601a33ab4651',\n",
|
462 |
+
" '271af180d99846e4a0d8c57f444df81d',\n",
|
463 |
+
" '4af16168d5a1432e8ca9719c9000f58c',\n",
|
464 |
+
" '405113fc9e334cada56589b758cd9fd7',\n",
|
465 |
+
" 'c95f295e46ba4b82b9f92fc0dcc8c1df',\n",
|
466 |
+
" '475897fbe33347cf907f3cc381f40c0e',\n",
|
467 |
+
" 'b6779e2220c444d38741c06cc2bb380c',\n",
|
468 |
+
" '7d56c936c7d84514a67cd75e369449f7',\n",
|
469 |
+
" '272a9892cdb742dcbe5f90e29eefae72',\n",
|
470 |
+
" 'c28ace207c9d437da68cf599ac028bbb',\n",
|
471 |
+
" '6d3c684dd6894bd9bf24486175ed834d',\n",
|
472 |
+
" 'd8766ca5bb7d468399e6b864756a04e6',\n",
|
473 |
+
" 'f5501aae471447fba9a4ac7ccf88c1fe',\n",
|
474 |
+
" 'bb4d8f7876a141e0ba82eaebe7899c5a',\n",
|
475 |
+
" '1c87be78b3fd48a093c23a54904bf8bc',\n",
|
476 |
+
" '7cfe5d24a86645e1928a4700e2175e82',\n",
|
477 |
+
" '79a9e904f5bb48a3830647c6afccbb85',\n",
|
478 |
+
" 'deda349c16f54a9f85cd302269c22456',\n",
|
479 |
+
" '5339992d8dcd464294260f5c0c857fff',\n",
|
480 |
+
" 'd86ca1d2c07f4784956acb34d4d8c48c',\n",
|
481 |
+
" 'dadf0561bb1c4ad9a87cc33a21424d32',\n",
|
482 |
+
" '63c86b1adaed4514a75e0409a66b15c3',\n",
|
483 |
+
" '61425b5443b840f2b7d28347d4002192',\n",
|
484 |
+
" 'e7166ad200694bb7ae645e63495dbfa5',\n",
|
485 |
+
" '9a4e61507dbd4fcd96b9c4b8eb24e74d',\n",
|
486 |
+
" '5cb628ca8b8245e0ae326ccb8ae5635d',\n",
|
487 |
+
" 'cbf3322896f8445ead83a6907a9aae08',\n",
|
488 |
+
" '9156196800e64996891c0703499ffbb5',\n",
|
489 |
+
" '47c88e62be7e4cb88b7d4935ba38cff0',\n",
|
490 |
+
" '0fee655d64c34f84a07e6b889866a486',\n",
|
491 |
+
" 'e4472727736f4fa59d49536d8e331f95',\n",
|
492 |
+
" 'f933c36480f64d8b9600c5075a085e61',\n",
|
493 |
+
" '808c6b3ce87345b391843aaa6b253bfb',\n",
|
494 |
+
" 'f5d6ee781bd742fc88d5ddc2e5f0a7f4',\n",
|
495 |
+
" '281ac17550864cf5827193ddd577aad1',\n",
|
496 |
+
" '3d3aa0ebb1574fa7b498a13abb1b7c40',\n",
|
497 |
+
" 'f43bf31cfe994208b24e363f9459a7a0',\n",
|
498 |
+
" '9931894cfd004a20991a7fef40c23c86',\n",
|
499 |
+
" '1b34b9f61f164993b7387a73e961bf2b',\n",
|
500 |
+
" 'e2ca8df6b02d4240b7f1e4474b4765c7',\n",
|
501 |
+
" '42741aaeaec7422f8ab9c59d18430455',\n",
|
502 |
+
" '6a0a67f326704e11baae384eb567fa09',\n",
|
503 |
+
" 'f7894024f0764978a9eca821c29d3449',\n",
|
504 |
+
" '7f88bf5493764642a14a5bd8bbf04a71',\n",
|
505 |
+
" '93a5412c61204d53b94fda693fb561b8',\n",
|
506 |
+
" '3d265760e45a45d990240628c46fde6f',\n",
|
507 |
+
" '62112a36dcab48379590ca210ca09918',\n",
|
508 |
+
" '1abadc8684e64e6c8cbe1f7427d39678',\n",
|
509 |
+
" 'f1f8c1a6c7534c5bb386425728cfa2b3',\n",
|
510 |
+
" '81b640e8ae0747daadeda29da9f677f6',\n",
|
511 |
+
" '6428b2a89f384a6985d69b0183fc71c3',\n",
|
512 |
+
" '107fb02d9c7e4bffa9669509015e8af1',\n",
|
513 |
+
" '73b8fd9b8aea45a6adfc02d5795bec62',\n",
|
514 |
+
" '931cdf852b634abfb01b656221a8d0ae',\n",
|
515 |
+
" '81e9f8177fe9430a9fb17fd20522c955',\n",
|
516 |
+
" '7dbc18b381454afeb2a6041f60c2b23b',\n",
|
517 |
+
" 'f621fcf8f34f4629909ca455ecaa4f55',\n",
|
518 |
+
" 'c358be2860cf436d8fabb3200888c307',\n",
|
519 |
+
" '81516460f65740e9aef0f4babc29b2f1',\n",
|
520 |
+
" 'ecd2409c27cf47aeadeff569bd25ea85',\n",
|
521 |
+
" 'a4c997a1ad7f4990b2d71cb028463610',\n",
|
522 |
+
" '1ed8f8421791456db0543cd3e1ede40f',\n",
|
523 |
+
" '56f5077192e74e09a58017d0c3368bc4',\n",
|
524 |
+
" 'a639e85d3bee4530a53d132bfa7c58de',\n",
|
525 |
+
" '4c75146c59dd4541a8500f89dd060a2c',\n",
|
526 |
+
" '95c74438067f4bada1fee37942e06ed7',\n",
|
527 |
+
" '0b3f010515574c48b02bebf7a451052e',\n",
|
528 |
+
" '4db576ae022d42beadd921a81e977096',\n",
|
529 |
+
" '3998f2de8bf44929afa7ad0e2e86eccd',\n",
|
530 |
+
" '73cf1599b76d4061874e660228ca5f06',\n",
|
531 |
+
" '962fb1291b984d60adb133201b7eae48',\n",
|
532 |
+
" '365a19df65514c698d826e86fcdc6091',\n",
|
533 |
+
" '5e86e99df25a4cc287d8ea0605f8cb08',\n",
|
534 |
+
" 'f2cccab55efc43d5b098c38c31f687fb',\n",
|
535 |
+
" '3388b8f7db314bf5a60cd10dbbc45f9c',\n",
|
536 |
+
" '2a83e0ed7b4e4d2f906cfbc8dca7c512',\n",
|
537 |
+
" '5b4f99c2acab40248de70a0e92506bc0',\n",
|
538 |
+
" '278560e5a9e244e1a0a2ffa0ef7c261a',\n",
|
539 |
+
" '864b65e24dea4473ad0e4a5bc32f4c69',\n",
|
540 |
+
" '6b089bf4dd004ed78f1b92c50d414e47',\n",
|
541 |
+
" '5bebbff6685649b99fa304d40b9b6362',\n",
|
542 |
+
" 'aecb11bcf1444ad589508ea8bec77bdb']"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
543 |
]
|
544 |
},
|
545 |
+
"execution_count": 13,
|
546 |
"metadata": {},
|
547 |
"output_type": "execute_result"
|
548 |
}
|
|
|
555 |
"\n",
|
556 |
"dimension = 1024\n",
|
557 |
"collection_name = \"ai-safety-sr-arctic-embed-l-semantic\"\n",
|
558 |
+
"qdrant_server = os.environ[\"QDRANT_API_URL\"]\n",
|
559 |
"qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ[\"QDRANT_API_KEY\"])\n",
|
560 |
"qdrant_client.create_collection(\n",
|
561 |
" collection_name=collection_name,\n",
|
|
|
568 |
" embedding=embedding_model,\n",
|
569 |
")\n",
|
570 |
"\n",
|
571 |
+
"vector_store.add_documents(chunked_docs)\n",
|
572 |
+
"\n"
|
573 |
]
|
574 |
},
|
575 |
{
|
576 |
"cell_type": "code",
|
577 |
+
"execution_count": 14,
|
578 |
"metadata": {},
|
579 |
"outputs": [],
|
580 |
+
"source": [
|
581 |
+
"retriever = vector_store.as_retriever(search_type=\"similarity_score_threshold\",\n",
|
582 |
+
" search_kwargs={'k':10,'score_threshold': 0.8})"
|
583 |
+
]
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"cell_type": "code",
|
587 |
+
"execution_count": 15,
|
588 |
+
"metadata": {},
|
589 |
+
"outputs": [
|
590 |
+
{
|
591 |
+
"data": {
|
592 |
+
"text/plain": [
|
593 |
+
"[Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 44, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': 'b6779e22-20c4-44d3-8741-c06cc2bb380c', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='Human-AI Configuration \\n'),\n",
|
594 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 33, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '26f3917c-c227-4e99-8e11-5212273fe2ba', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='Human-AI Configuration \\n'),\n",
|
595 |
+
" Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 11, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': '07a8b461-51e7-4641-b97a-d823ef91082f', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content=' \\n \\n \\nFROM \\nPRINCIPLES \\nTO PRACTICE \\nA TECHINCAL COMPANION TO\\nTHE Blueprint for an \\nAI BILL OF RIGHTS\\n12\\n'),\n",
|
596 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 37, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '1d43c4ca-f83b-4708-97e9-6410f4dae5ee', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='Human-AI Configuration \\nAI Actor Tasks: AI Deployment, AI Impact Assessment, Domain Experts, Operation and Monitoring, TEVV \\n \\n'),\n",
|
597 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 61, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '73cf1599-b76d-4061-874e-660228ca5f06', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='et al. (2023) Whose Opinions Do Language Models Reflect? arXiv.'),\n",
|
598 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 28, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '00ca0c39-98b6-4339-874a-d036983a0922', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='Make sure these tests cover various scenarios, such as crisis \\nsituations or ethically sensitive contexts. Human-AI Configuration; \\nInformation Integrity; Harmful Bias \\nand Homogenization; Dangerous, \\nViolent, or Hateful Content \\nAI Actor Tasks: AI Design, AI Development, Domain Experts, End-Users, Human Factors, Operation and Monitoring \\n \\n'),\n",
|
599 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 59, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '81516460-f657-40e9-aef0-f4babc29b2f1', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='https://www.rand.org/pubs/research_reports/RRA2977-2.html. Nicoletti, L. et al. (2023) Humans Are Biased. Generative Ai Is Even Worse. Bloomberg. https://www.bloomberg.com/graphics/2023-generative-ai-bias/. National Institute of Standards and Technology (2024) Adversarial Machine Learning: A Taxonomy and \\nTerminology of Attacks and Mitigations https://csrc.nist.gov/pubs/ai/100/2/e2023/final \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework. https://www.nist.gov/itl/ai-risk-management-framework \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework, Chapter 3: AI \\nRisks and Trustworthiness. https://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Foundational_Information/3-sec-characteristics \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework, Chapter 6: AI \\nRMF Profiles. https://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Core_And_Profiles/6-sec-profile \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework, Appendix A: \\nDescriptions of AI Actor Tasks. https://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Appendices/Appendix_A#:~:text=AI%20actors%\\n20in%20this%20category,data%20providers%2C%20system%20funders%2C%20product \\n'),\n",
|
600 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 57, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '1b34b9f6-1f16-4993-b738-7a73e961bf2b', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='(2020) Overcoming Failures of Imagination in AI Infused System Development and \\nDeployment. arXiv.'),\n",
|
601 |
+
" Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 0, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': '8dd5b1e7-fd46-4e2a-90c2-8a8eea8b0cb9', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content=' \\n \\n \\n \\n \\n \\n \\n \\n \\n \\nBLUEPRINT FOR AN \\nAI BILL OF \\nRIGHTS \\nMAKING AUTOMATED \\nSYSTEMS WORK FOR \\nTHE AMERICAN PEOPLE \\nOCTOBER 2022 \\n'),\n",
|
602 |
+
" Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 23, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': '8c64aecd-850b-48b3-bac2-16e73ebad1e0', '_collection_name': 'ai-safety-sr-arctic-embed-l-semantic'}, page_content='Some companies have instituted bias testing as part of their product \\nquality assessment and launch procedures, and in some cases this testing has led products to be changed or not \\nlaunched, preventing harm to the public. Federal government agencies have been developing standards and guidance \\nfor the use of automated systems in order to help prevent bias.')]"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"execution_count": 15,
|
606 |
+
"metadata": {},
|
607 |
+
"output_type": "execute_result"
|
608 |
+
}
|
609 |
+
],
|
610 |
+
"source": [
|
611 |
+
"retriever.invoke(\"What steps can organizations take to minimize bias in AI models?\")"
|
612 |
+
]
|
613 |
+
},
|
614 |
+
{
|
615 |
+
"cell_type": "code",
|
616 |
+
"execution_count": 25,
|
617 |
+
"metadata": {},
|
618 |
+
"outputs": [
|
619 |
+
{
|
620 |
+
"data": {
|
621 |
+
"text/plain": [
|
622 |
+
"['dd370438231c41dbb7b1b4f1e7673cf7',\n",
|
623 |
+
" '02ebba25e01941849b9e2c9d5097b55d',\n",
|
624 |
+
" '099f0083356a4914b53fcb30df633b50',\n",
|
625 |
+
" 'f8aefa25a4544c869ca4caaf686b3d47',\n",
|
626 |
+
" '9ec0798fb4554f95ab65bd05315af118',\n",
|
627 |
+
" '33bdad4db0ab4145b85726f77f1789ad',\n",
|
628 |
+
" '98a75a601b114b07953b5aef4e032b4a',\n",
|
629 |
+
" '1e49952c0d6743ba8ad52a049c18daa3',\n",
|
630 |
+
" 'c3babb9205e54ca99ba6e5a03679bdba',\n",
|
631 |
+
" '74cecdae132c4a5e953bd7e72ac6850e',\n",
|
632 |
+
" '29529ea9530541a0bb446a8e82fab913',\n",
|
633 |
+
" '4193dcf34f6249b1a29c49a52239deef',\n",
|
634 |
+
" '84cb5d0f2cee47beabd72baa54161155',\n",
|
635 |
+
" '622f279ac5bd40b082725d90972e9ae3',\n",
|
636 |
+
" '48e366f92aa449e89cf7158584d2cf6a',\n",
|
637 |
+
" 'e2ffb7cb2ac3482fb9290940fabe9582',\n",
|
638 |
+
" 'f52a4c3353544fff93f241cba063028a',\n",
|
639 |
+
" '0c81aa08ddd4496a9aaea4b001f3596c',\n",
|
640 |
+
" '3e9d8d7785b04d5fad063219c94ef0dd',\n",
|
641 |
+
" '76796785c7b64d428e48b7cf699e155a',\n",
|
642 |
+
" '593ab20fc2494634959b0bfd8821ea91',\n",
|
643 |
+
" '654421ae91df4739bfb1ebdfb7c9dda2',\n",
|
644 |
+
" '27ffe059aafd4d5fa795b2f893b1d57e',\n",
|
645 |
+
" 'f1468d8276444858acb33bd6e2d36e73',\n",
|
646 |
+
" '5a6a15255cdd438abd9b2c3358dca939',\n",
|
647 |
+
" 'fbb13ef430ca47d28013dda9feaf4625',\n",
|
648 |
+
" 'fc16826ddd504038bb5f32fd97cdd98e',\n",
|
649 |
+
" '72c878c56d8746dea51fdcf506e48894',\n",
|
650 |
+
" '257ac1e04a4b478ab3b84c81e5dfc3f4',\n",
|
651 |
+
" '68b157c05ced46828ac39894e69b8d08',\n",
|
652 |
+
" '535e59df03184e86b30a09cd2d169dcd',\n",
|
653 |
+
" '1a6d76252d364a758564a41b922d44a4',\n",
|
654 |
+
" '61e497e66868447988198ba831096707',\n",
|
655 |
+
" 'dd9f18bedfed443c8bed0fc4c34c5e23',\n",
|
656 |
+
" '0ca0575097c24b50a613d5a19de61cfc',\n",
|
657 |
+
" '7dfba6cbbfe34756ba3f40b1be282324',\n",
|
658 |
+
" 'e9b68e9579194b04ad65bbf85332d351',\n",
|
659 |
+
" '7545cee6d2e345ba90e95082a15271b8',\n",
|
660 |
+
" 'df1e9db6843a4ddbb788b1e9117db9a1',\n",
|
661 |
+
" 'bb0687d2f3d047138d0414d0b2a22917',\n",
|
662 |
+
" 'b79ed7024a064c1f9360692c93615657',\n",
|
663 |
+
" '70fb8aa096a74d0a975705ac44f08577',\n",
|
664 |
+
" '41bf93d83ebe414e91253e7a96f50ec7',\n",
|
665 |
+
" 'bdab13de5b514bf68921751b3051ce60',\n",
|
666 |
+
" 'ccd47e89a09c4519981dc5d9be7b1ad9',\n",
|
667 |
+
" '334ac2db387848f1829e174c6584288b',\n",
|
668 |
+
" '5484df8c41cb41babd01c3f8d62121a2',\n",
|
669 |
+
" 'dd1f97aef70e439ea02c8f0d0ea397e0',\n",
|
670 |
+
" '99ccff600f8b4470af445f1f060e5518',\n",
|
671 |
+
" 'd4de01e6623741d3b06c8ee973ad6670',\n",
|
672 |
+
" '6217b664cbba4a64bf6e4f2ffde27831',\n",
|
673 |
+
" '3974c50b7e3a4503925f1c397254d259',\n",
|
674 |
+
" '4959c05e7d8049a4b75cff3bdc6fc30d',\n",
|
675 |
+
" '9d3aeacd6513463fbe9d13c1fb2441fc',\n",
|
676 |
+
" '8777904b546e4ef5b2759f0a60fd1fca',\n",
|
677 |
+
" '7a73c81712804111b6145b57888455ae',\n",
|
678 |
+
" '87036e89882546b69613378b17610332',\n",
|
679 |
+
" '5c508cd4449449c486b811d65b9b6db1',\n",
|
680 |
+
" 'fd5a25bb9038481aabaed2b34a7f2cc9',\n",
|
681 |
+
" '80b47526b0224fc0ba54cf4a61da11cc',\n",
|
682 |
+
" 'a2c5d0697278407fb0d89c9c138bfba0',\n",
|
683 |
+
" '49cb7eb52ea043f2baf21b611709d83a',\n",
|
684 |
+
" '96d002f0aa0a4cd7a86b188ef7811e9e',\n",
|
685 |
+
" 'd5a4fd354f904b99a8700363f7bcec7d',\n",
|
686 |
+
" 'ec384dfa0a5d4caeab593a4d013e40de',\n",
|
687 |
+
" 'b613bc8f681141249d11f2eea7691f32',\n",
|
688 |
+
" '7b9e491ad88b48f19ce2698c4d8ef5ec',\n",
|
689 |
+
" '011946a9fed74b14b7d4be2ff4eaadf5',\n",
|
690 |
+
" '2c07a4769e85425a9a32a053f1293ff2',\n",
|
691 |
+
" 'c859a58fd5a54447a22217564e610e77',\n",
|
692 |
+
" '2d6fcf19e009459e82b344a699c6556f',\n",
|
693 |
+
" '22ec0544ffc44be1bd557cd91e96caf4',\n",
|
694 |
+
" '1a9fed777bc8454faed8c60a12dce190',\n",
|
695 |
+
" '898d974afb3f472a96c9cca3c698fee1',\n",
|
696 |
+
" '21cb161a28f34fc89b90b369d1895fd6',\n",
|
697 |
+
" '9409c811bf6542feae01351580bcb32b',\n",
|
698 |
+
" '1219e011429b4e29a84268bdbb66d7a3',\n",
|
699 |
+
" '3293d7aeecf54778a4b1e63f09f3f362',\n",
|
700 |
+
" '543f56abfcda469a826e797aa2a4ae36',\n",
|
701 |
+
" 'b714fd2157d145658860f0db0bd95163',\n",
|
702 |
+
" '8be2abe5fee54507a03f0a5d0ba2f0c7',\n",
|
703 |
+
" '76bf633b2d6d49b98845023de4024f09',\n",
|
704 |
+
" 'eb00c39f80904f79847f0156e1e88ff0',\n",
|
705 |
+
" 'c9695522dbb243cbaaf48b9a5b9f4105',\n",
|
706 |
+
" 'aa3a41bf64fb4136b1fc097ad40378eb',\n",
|
707 |
+
" '4ba6b441be194008aac5fec9aa0eac53',\n",
|
708 |
+
" '7b5e6c78bcc64d4c879cc0817436ab35',\n",
|
709 |
+
" '90229474433449648237b410db3cdfb7',\n",
|
710 |
+
" '1ace5952f4c043e0a0864b9926475add',\n",
|
711 |
+
" '2208ad4c34bc4fdb940e1cec9df0f6bc',\n",
|
712 |
+
" '15e5cc7345b64cc08ad9825085af5486',\n",
|
713 |
+
" 'ccac823288be4ed2a3d5617dda575120',\n",
|
714 |
+
" 'e01e349952e54ef4849519acdaa6725b',\n",
|
715 |
+
" '794cb0ec54724738a48d16e18f6cb3b9',\n",
|
716 |
+
" 'd9ebc60044124f0a890f7836cb58f4a2',\n",
|
717 |
+
" '9fda4771fa994fcaa609088cfc961dd4',\n",
|
718 |
+
" '67fd7a926392436a9b344903bbbc08f7',\n",
|
719 |
+
" '7892533c0b57466e8249f11d5cba07d3',\n",
|
720 |
+
" 'ba1b7ad3addd4557aa6983cc309fdd49',\n",
|
721 |
+
" '3663e44b29e14192abe4f49c98e3db45',\n",
|
722 |
+
" '476728c883404adea36c903128c98139',\n",
|
723 |
+
" 'dbec4491b8b94aa0b9327ead46b4251f',\n",
|
724 |
+
" 'b635ebf0a3644e71ab64bfd43faec517',\n",
|
725 |
+
" '149ee2b23af9448c896c5bf87f1c9257',\n",
|
726 |
+
" 'f6082f115738427db840c3ff58a7c48d',\n",
|
727 |
+
" 'd56f7d290c184a5aabdd80251ee807b5',\n",
|
728 |
+
" '37738bf3df9048c381a26116632aff03',\n",
|
729 |
+
" 'a139a0ccd8834a3293c65b8a9fb0a2ad',\n",
|
730 |
+
" '6cce500b28a944fbb6ad637ae5d3c227',\n",
|
731 |
+
" 'e16444a94afb409b827c3ee3f57237b9',\n",
|
732 |
+
" 'd2d0cd1eac154f17b31a6a97d87bca88',\n",
|
733 |
+
" '75bc3a67451e44a487939e6ca74e39ad',\n",
|
734 |
+
" '9074393c760844908a39e806f7d4714e',\n",
|
735 |
+
" 'fe16063331f848d897ac4466e9237fd7',\n",
|
736 |
+
" 'a9504de4c7cd427cae397ddd551e3bba',\n",
|
737 |
+
" '791979c2b3d747bf857370c4ab7e7757',\n",
|
738 |
+
" 'cc31a5edfbfd486482b202b8b87c8e9f',\n",
|
739 |
+
" '6cf68d7de2334461809f1867dfef1280',\n",
|
740 |
+
" 'e8acd67d2b614ebd8e0786dcd961c05c',\n",
|
741 |
+
" 'e30854b096ab48b3b7f3729bd07914e1',\n",
|
742 |
+
" 'ae46462add43450d899655e2e0819e59',\n",
|
743 |
+
" '1718ec5c859c4b37a8c8ac0bbcffa616',\n",
|
744 |
+
" 'ae4a9b112f364a34a47f0cf255b882a4',\n",
|
745 |
+
" '5ab58a4ae877437898e06968149195b2',\n",
|
746 |
+
" '76401834e26049b096cdbb054cb37c7a',\n",
|
747 |
+
" '52b9f9683ba14fb2a6f84f5b6d619b40',\n",
|
748 |
+
" '96d56773bf2e4d858f62654f20ccd53d',\n",
|
749 |
+
" 'd8a258c7815040808dbab44252e15e77',\n",
|
750 |
+
" '9f4ca5df27674f4cba5628dd98f1ef2b',\n",
|
751 |
+
" '8e0679ac4cbc44839a7f77a12ce52220',\n",
|
752 |
+
" '964aa8b0653c4bdaa236d5bb6f1eff1d',\n",
|
753 |
+
" '219697ddd2d545c484443ca116943b63',\n",
|
754 |
+
" '607bf4aa92c64da9a4873477e2e9b363',\n",
|
755 |
+
" 'efd6114026d2454d983e7b4063656266',\n",
|
756 |
+
" '81fdf32f6fb949508196043e4142261b',\n",
|
757 |
+
" '14f82f6a28a9488a93ce3724bdc2a476',\n",
|
758 |
+
" '3d44caaa301046af9acdd238d1e3cbda',\n",
|
759 |
+
" 'd8d11ff667ed4c0f94958e794e9b2c60',\n",
|
760 |
+
" '90ba7f9ad22d46d99316b36cf213ec01',\n",
|
761 |
+
" '76e251b11fb04240ae381af227c136bd',\n",
|
762 |
+
" '3dec921799b24369a5b9189dd28c0f55',\n",
|
763 |
+
" 'e7d706e31326405aa1a8acaa627ee2f6',\n",
|
764 |
+
" '9b8c00a9f69649a1a66decee2aa77c9c',\n",
|
765 |
+
" 'e2902a1405cf41db9a0428259089cbef',\n",
|
766 |
+
" '5d43e9e802fb49098531b257c5633723',\n",
|
767 |
+
" '413aeaa9a2bc4970b21371940026dba8',\n",
|
768 |
+
" 'bfb4471ff2af4bada2ac8bcd14429d24',\n",
|
769 |
+
" '533ff7a97fca4578999e6e0434df17d5',\n",
|
770 |
+
" '04ba353da230424bbf25ab29a18e20f4',\n",
|
771 |
+
" '1ed6dc9440f14ac9b1deadc10fbb660b',\n",
|
772 |
+
" '734aa1cc14a34825be7dbc947ecdb525',\n",
|
773 |
+
" '482153d0c05f4453a3bc9f57a1804406',\n",
|
774 |
+
" '76d988efad614d3dafc0ecb8fbdb2189',\n",
|
775 |
+
" '6d9ada3651704a0ca2b40664a59c8579',\n",
|
776 |
+
" 'ac9272192bfc416d8386487c9b381ccf',\n",
|
777 |
+
" '0d8c4c52de304f7f9778455f8ad178ec',\n",
|
778 |
+
" '9daf6aed58624240b7fb1fcf79d9dda2',\n",
|
779 |
+
" '7ee8e6bfacca440f9ef345e59eba7401',\n",
|
780 |
+
" 'e80b50ed3a3a42d2a24bfe85cd5d45db',\n",
|
781 |
+
" 'c0a0384aad9345f3b6162bf63c5bc0d3',\n",
|
782 |
+
" 'df7c71db74af415b96cb2cea32d0ba30',\n",
|
783 |
+
" '329b6e0ed9cb43cca48970fcc286e299',\n",
|
784 |
+
" 'fd4c170eb78d401fa3eec3253be26b98',\n",
|
785 |
+
" 'a98ea19a24cb4b04bd943d226dc41bba',\n",
|
786 |
+
" '8e917ca54af2400cbd83e08e28ef0bb0',\n",
|
787 |
+
" '78a3cf3647cc44f9abb083eaa8b79947',\n",
|
788 |
+
" '140a48e4acbd44da9fede75e12fa80f3',\n",
|
789 |
+
" '92d6e0856f5848869f7feb8cd17d7088',\n",
|
790 |
+
" 'bf482ac0daea44a789182b52fbd2f413',\n",
|
791 |
+
" 'f562e2d3c5b9422b845b6f87806e4d6c',\n",
|
792 |
+
" 'e8b0bdec066a4bb0b7a47ec7e10c10e1',\n",
|
793 |
+
" '45a054f0a9824867a52db472b2b65ad4',\n",
|
794 |
+
" '40ff751a666347ed9c2326341587ea51',\n",
|
795 |
+
" '1e35fbf001de48599063d4fe6dae165b',\n",
|
796 |
+
" 'fa3b6f2ebe274d54851e9a31975470a6',\n",
|
797 |
+
" '42242351bcb844c589f44a80cc139fdf',\n",
|
798 |
+
" 'e5eb97e74797481d998677225cbaf365',\n",
|
799 |
+
" '85f4bc0ed08c49feb6fd69f5659eaa36',\n",
|
800 |
+
" '87df9b4b3b8a44eaa8756bbf8c967d8e',\n",
|
801 |
+
" '7c04f74911aa4d629f0a545155e60b8d',\n",
|
802 |
+
" '133597b368564215a8b71b2535a07032',\n",
|
803 |
+
" '97473d57f4bd40af96f63bc06a1c6117',\n",
|
804 |
+
" '3b3f2e2c08774f42bf0c904230b06c4c',\n",
|
805 |
+
" '9661e9d3901e429d9030b04d28d98a19',\n",
|
806 |
+
" '488e7a22bcdd400fb1ea9e52a102cd8f',\n",
|
807 |
+
" '95086a933034484b9eeca08343c0dc21',\n",
|
808 |
+
" 'e5aa0c58bc1448169bdebc7e99fcbd42',\n",
|
809 |
+
" 'e6ba2f1ca8284d11a309757bc42ead7f',\n",
|
810 |
+
" '6510e8c73d10408fa038b242f95cae2d',\n",
|
811 |
+
" '36c2ba7197ce4c238472599a256f60db',\n",
|
812 |
+
" '12ce3404c5024cc5a3ca4d1c0773d759',\n",
|
813 |
+
" 'd0b909588d804b1aa89b496efcb6d16f',\n",
|
814 |
+
" 'e830a0b4f0fa483b9bb9816162bde54f',\n",
|
815 |
+
" 'eea7403faa024fc8a477b4b2e12bfc99',\n",
|
816 |
+
" '1fbbcd091c3948229841d1a1e53cedef',\n",
|
817 |
+
" '0bc14c99eb8141fc9f5ad1a13e8c5f90',\n",
|
818 |
+
" '544d8570f3e847aea814771f3af2397e',\n",
|
819 |
+
" 'd4e66a38f60c47deb7307e0f65829409',\n",
|
820 |
+
" 'b520c61605aa473d89c88e3d277f40c5',\n",
|
821 |
+
" '3f01a965995e495a8b3067fd0fdcc978',\n",
|
822 |
+
" '9a5da84235c14817a3f0bda30a2bbcbb',\n",
|
823 |
+
" '94792a1b19654f45a2fd8cc362dacddf',\n",
|
824 |
+
" 'b46df9b21e764b39a0180bf42f9a835b',\n",
|
825 |
+
" '47a4942336cd45cea7afcc68d99f1cd5',\n",
|
826 |
+
" '2081541601de49199f881ffcb1625d4d',\n",
|
827 |
+
" '972dcffe6c2e4cf2815cd571e9f4021d',\n",
|
828 |
+
" 'e4613bb436fe46aaa3c236a209038124',\n",
|
829 |
+
" '355cc289660d494db52e039127ecde34',\n",
|
830 |
+
" 'b2b84fffde454e2d967ec6330e637b37',\n",
|
831 |
+
" 'ab32b49526ee485d998dca366dace258',\n",
|
832 |
+
" '48e0545b903f49bca771a22861166708',\n",
|
833 |
+
" '47f62df12d494ba48b1bfee4bc1820f0',\n",
|
834 |
+
" 'c131bfc970324e068ff4e04df6191c8d',\n",
|
835 |
+
" '02a961ad52a34d1ead8d5c1a9ee12031',\n",
|
836 |
+
" 'dcc8094b71444056a9c85f1b69b7e6df',\n",
|
837 |
+
" 'd95234dd8571413ab9dc2cf2bd4031ef',\n",
|
838 |
+
" '2b80007625d04c3a8b3c10fd35181861',\n",
|
839 |
+
" 'b2b150d718c64b38925de1ee0abf14ed',\n",
|
840 |
+
" '49f5645c803b4da78f09c7f0d337867a',\n",
|
841 |
+
" 'af890841867746499efe8600704630b4',\n",
|
842 |
+
" '7218a0d2f3e34a729da8a10e41a591fa',\n",
|
843 |
+
" 'bafcc1d1244f40da99adcfb72f87b170',\n",
|
844 |
+
" '14f2b87359524bfbac74a4948fdd135d',\n",
|
845 |
+
" 'b209da5ac4ad4110834f018a3301f5cb',\n",
|
846 |
+
" 'bf06ddade01d466592ea9cadbada320d',\n",
|
847 |
+
" '865d5986afde44f4ab593708125e90ad',\n",
|
848 |
+
" '3be18dd8e0bb4940bc58b257bad9c5b9',\n",
|
849 |
+
" 'c02da0af95774b39af650dc268c8eed0',\n",
|
850 |
+
" '7fae32ee9e934e2b8f164212ad9190b0',\n",
|
851 |
+
" '8d0c1b678ea742cca445577d36a58e26',\n",
|
852 |
+
" '4c1667fd01804d08bca4485a427b7cf3',\n",
|
853 |
+
" '5d3bf9345565447095a9ffd9319997d9',\n",
|
854 |
+
" 'ca5889dd43c5498ca449a733c36631d9',\n",
|
855 |
+
" '5ba45f56b2f0412c835d4328b88037d3',\n",
|
856 |
+
" 'e1d3f4649f234a8395a63ae1de670449',\n",
|
857 |
+
" '17e91fcb2ae14f56a0f60c6acaf4258d',\n",
|
858 |
+
" 'ed1d8496e014462db3aae0a046d4aeed',\n",
|
859 |
+
" '8dd4483ba29448b7a285cefcaeb135fa',\n",
|
860 |
+
" '77db975e07284a7a814ef386664c97aa',\n",
|
861 |
+
" '93c28d0a7eb646969d0511b786fa7a71',\n",
|
862 |
+
" 'c596f3b0927c49c1a4193eb5f0479395',\n",
|
863 |
+
" 'e1648b2975284446bbbaefba431cdd78',\n",
|
864 |
+
" '76bcc756b84a4b169a128973ef7228fe',\n",
|
865 |
+
" 'ba57f715ffaf477fa15a733ddf5339aa',\n",
|
866 |
+
" '644902133d4645bfbd02d9629fb737da',\n",
|
867 |
+
" '124a4a8ec036421486e8501be3af4692',\n",
|
868 |
+
" '0b95233f27f54043aee48dd77096c62c',\n",
|
869 |
+
" '1ef09ede43d546b4a6a73b48c4cb48f1',\n",
|
870 |
+
" '9dbee7e4bb32427f8fd0b0229ca0d2a6',\n",
|
871 |
+
" 'f915f126e0f24f2299e6bfd16a5d3c1d',\n",
|
872 |
+
" '3686b153c85248f6a2fc1fff12eaafe3',\n",
|
873 |
+
" '850e99ca1e58439c8ccf36e2b6a7ecde',\n",
|
874 |
+
" '25c84a37812b47c8adfd41b30af8c0bb',\n",
|
875 |
+
" 'f96eb4a5818e4ebf8ae654d35cdc08a4',\n",
|
876 |
+
" '31aaf38fa0bf49f4964d317f000840fd',\n",
|
877 |
+
" '8c914f8f496741dab0d661f8bf84e061',\n",
|
878 |
+
" '215a8d37eb5249ae97b9471c8ec0f888',\n",
|
879 |
+
" '9f5e41e99d314cba824998d58ca1a611',\n",
|
880 |
+
" '6b01b294c1774e34919059a7388aabd9',\n",
|
881 |
+
" 'c1c6025360a9458085d5342cf8e703e0',\n",
|
882 |
+
" 'b3fdbd3082794bd4b0ab4d4f2c8149f8',\n",
|
883 |
+
" '700188e4d52b44fa9fde7512f54d7b1b',\n",
|
884 |
+
" '00dad32bd08f4b39b153cc96b8497f4b',\n",
|
885 |
+
" '9bb6f94103404153b68855d9993e9493',\n",
|
886 |
+
" '56e07e044da34280830555b42799444d',\n",
|
887 |
+
" '1d57812e47de41bf99efbbfa34865acd',\n",
|
888 |
+
" '39c2def37c7d4b15a4e766632ea9eb98',\n",
|
889 |
+
" '93e308150c2b44688cd13847402815b0',\n",
|
890 |
+
" '1f595875c8ee4ac9a261bfb0a429067c',\n",
|
891 |
+
" 'e7043593429e48a6bdcd8095f3ee2993',\n",
|
892 |
+
" '8827c0bc83eb4dbeb48befa28e6ded29',\n",
|
893 |
+
" 'acb099517d0449239adf6c9dde626772',\n",
|
894 |
+
" 'f94e571e12af4903bfe866f6e028124e',\n",
|
895 |
+
" 'd027a4d37b3640ad894587dab59a7494',\n",
|
896 |
+
" 'f35c824977cc4d46962b01ad10f5ceb4',\n",
|
897 |
+
" '836279f1552c4417a642da79743aeb33',\n",
|
898 |
+
" '002978f8f7ab4cada169ea0d054499a5',\n",
|
899 |
+
" 'e7a1c0978a2a4cd0b6b317ceda9874fc',\n",
|
900 |
+
" '0f977a8cf0514392882756b1f7c6fa26',\n",
|
901 |
+
" '7682cce87cfd470d95274b61e4eef8b8',\n",
|
902 |
+
" '3947f0e87d00475387486b47326ed258',\n",
|
903 |
+
" '6ddc01005056438cb611cd958b7a2d1d',\n",
|
904 |
+
" 'bc81516df0f440d8a1faa7363f011b75',\n",
|
905 |
+
" 'd676819f49004a56b7dc89cc5d5343ec',\n",
|
906 |
+
" 'b8fd60c1b629499dac6ea2cdeb837502',\n",
|
907 |
+
" '4e33d391a5634f0d82dd84dda6957811',\n",
|
908 |
+
" 'b007e62d2fe749a6ab713c005211a73f',\n",
|
909 |
+
" '18de6dda198e4f36befe7c81f88a7f42',\n",
|
910 |
+
" '029857b9fae04c498b62b46c40267afa',\n",
|
911 |
+
" '3be29a9773f24c079f24d9db9c662801',\n",
|
912 |
+
" 'a095542002ce46ca95e59094332f0228',\n",
|
913 |
+
" '40d61bdffba64369af605108a12a2999',\n",
|
914 |
+
" '7edeaefb56544debb539a1bafb766796',\n",
|
915 |
+
" '2f5335dc55594a04b67b07d86d937139',\n",
|
916 |
+
" 'f89a5a9d2d5047d7a74dd991ae0e8102',\n",
|
917 |
+
" 'fd4b6403ba4249b19c692a9dbdcdba01',\n",
|
918 |
+
" '3e02d74c82764b6997c4f965cfd6c233',\n",
|
919 |
+
" 'dfbe54ff42b4457db6cf921dc4ca0753',\n",
|
920 |
+
" '7abb841a671b40649ad478bc45c75b47',\n",
|
921 |
+
" 'a687e1b6e9dc40099a7d7d4ecd021a46',\n",
|
922 |
+
" 'ffdea99f5cec44e4abc9f4b8c6949fc1',\n",
|
923 |
+
" 'cb103ab7aa1a46f09d858dcf6880c862',\n",
|
924 |
+
" '4709ecfdce6c4392a245fad38093d1c4',\n",
|
925 |
+
" 'dbceabef7601444ba3a76c6bed960802',\n",
|
926 |
+
" 'a7f1acab40b145cbb2d8d84fc72733af',\n",
|
927 |
+
" '4df54db71ffa4dc4b1f10b022e3e6ef8',\n",
|
928 |
+
" 'a7fc14d6dbb14af6ae3ef0a3f68f1d07',\n",
|
929 |
+
" 'd5048b397eb04c6fb97a083b66aa6ac2',\n",
|
930 |
+
" 'b6420e8387b94f85aa36b5bfe589463f',\n",
|
931 |
+
" '767857ebb1fe41269ed4d82d967956d9',\n",
|
932 |
+
" '400246f61fa94320a267b7ab3f2e8cc7',\n",
|
933 |
+
" '8b763f3947974bd192d2d884c05c6428',\n",
|
934 |
+
" 'ea8f1f4852d64e09be5ac90b04404dad',\n",
|
935 |
+
" 'f6866c404c2c40b3a4314563846a911f',\n",
|
936 |
+
" '837631a58af84142b5042772b24da3bf',\n",
|
937 |
+
" '9d1586da31e44e6f9fb64ce9ff157673',\n",
|
938 |
+
" '098b85fd43c24f1aa3fd7c93a48fb98b',\n",
|
939 |
+
" 'f324b2ea7aef49979b23bb34f78846e8',\n",
|
940 |
+
" '6c35050aaecf43e7b7fcf40e9edfaa2f',\n",
|
941 |
+
" '910792f650bb463493a1b85488133ab5',\n",
|
942 |
+
" '2d428a2e50194db792e5a02146be9364',\n",
|
943 |
+
" '90ba4b5f05124c26aaec626e14ff2138',\n",
|
944 |
+
" '5c4b11be82e54ebab89237c6a4928284',\n",
|
945 |
+
" '11c9ce7a16094f788485a13218479435',\n",
|
946 |
+
" '27ef6c0ee62d4f4d9767d255f5b0bda6',\n",
|
947 |
+
" '603495b3760e453bbc7264b287754bd6',\n",
|
948 |
+
" '2afe6de14bc34f6381f30124a069d391',\n",
|
949 |
+
" '8666745692804c2bbd16b997d75f9426',\n",
|
950 |
+
" 'f9789e9010d44d2fa2eeadd121f9186b',\n",
|
951 |
+
" '3fdd94668daa4bd6a96e125c2725d9b4',\n",
|
952 |
+
" '02cf5c44de76414bb431468c721ea6ad',\n",
|
953 |
+
" 'ea3dc743311b46c6b7c6117c57de0333',\n",
|
954 |
+
" 'cac324fb19374fc892f4db768850823d',\n",
|
955 |
+
" 'ab4126594df44beb906274bbb1c0f40b',\n",
|
956 |
+
" '4cb686862a9a46ecaaa414860edba1cd',\n",
|
957 |
+
" '0e6481c6b52e43e5856a779b814b509c',\n",
|
958 |
+
" '73c0feaefcd44cffaa712880076005c7',\n",
|
959 |
+
" '147e1975a5b545c39eedde3c9e112d3f',\n",
|
960 |
+
" 'b52a309d67e44730a3e13f395aec79d4',\n",
|
961 |
+
" '6068165a21b64de191a203024d30275b',\n",
|
962 |
+
" '939af554456e4f9cb33268bd36d792c5',\n",
|
963 |
+
" '3dffe1eb87754b6ab3c932d8d77cfa00',\n",
|
964 |
+
" 'bf7419dc8ad84fc9b15e09b9125fe6b8',\n",
|
965 |
+
" 'f76718fe634243029f02130498d5afcd',\n",
|
966 |
+
" 'ef23c33828ff408abab8607b82eeb016',\n",
|
967 |
+
" 'da5f19bce12048a2aa11b06b85072f9c',\n",
|
968 |
+
" '32f57a9a758a4e41849fd85cedef76f0',\n",
|
969 |
+
" 'fba4c4a802904152bbeb6edb051e2607',\n",
|
970 |
+
" '728acf0b65ad4108a1e7a72b146e338e',\n",
|
971 |
+
" '0ff6d22870074917a6c014c33f4b7cf9',\n",
|
972 |
+
" '29d7bb91ce6b4a74893c614f725c5178',\n",
|
973 |
+
" 'b690e48b7dff464cb73dbdf1e6149309',\n",
|
974 |
+
" 'd92a821e7fdc4276b4fc8201dadfef62',\n",
|
975 |
+
" '7dcaeef50440471f83e8febf47b1049c',\n",
|
976 |
+
" 'd06797d35176423f97a118fb2921bf35',\n",
|
977 |
+
" '8601101c7fa9428c9624bce8ea4cee15',\n",
|
978 |
+
" 'f45441e8e3264add88478323c93e2d38',\n",
|
979 |
+
" '985d73938f0744fd9ca3e05d8ed4d99e',\n",
|
980 |
+
" 'aec96ecb915d47eea4ae0dae9dc95446',\n",
|
981 |
+
" '8120edfc5cf247babcaaf6e7bf59ebe1',\n",
|
982 |
+
" '821cb61115ef45f3b24ecf7e7ffe5b27',\n",
|
983 |
+
" 'e4dc1ed27a224c9294f648336d261c53',\n",
|
984 |
+
" 'b90241dc87844d628f45401349ab9887',\n",
|
985 |
+
" 'a255e1c4b31241058595c134f5de807a',\n",
|
986 |
+
" '648c5ad4530442e5b50f28f40c386bb6',\n",
|
987 |
+
" 'ffa4b3e9e4ae4289b0c404da4abadcbc',\n",
|
988 |
+
" 'fa5af409bfe441d1bcee2d0b5e377678',\n",
|
989 |
+
" '033a5c2bd8374ca6b8cb9b4d965954ab',\n",
|
990 |
+
" '357babd6dd4947749142359d0fc0cdd3',\n",
|
991 |
+
" 'e4853735544842de9790530cb56a3eba',\n",
|
992 |
+
" '3698a84c5a8644d99072de0b3a6aa9f5',\n",
|
993 |
+
" 'd9c6bac4f1dd41dcb3318843c7f79489',\n",
|
994 |
+
" '3dd799500efb450f8004d4240e037b20',\n",
|
995 |
+
" '686426621d894b2781e6fb48d4b16c8e',\n",
|
996 |
+
" '85b807e001ff46fe985770fa6af9a534',\n",
|
997 |
+
" '097971e5d47043a2b4d569d56634bb2e',\n",
|
998 |
+
" '46bb945a79c94a7da13c0c1506e1c457',\n",
|
999 |
+
" '5ef5cbe182d94b5d899d2dbc9595b3a8',\n",
|
1000 |
+
" '466985e3a3ce424ab38284db938f8d40',\n",
|
1001 |
+
" '3a87f5d24ee448649a1fb37e1572f0e4',\n",
|
1002 |
+
" 'e17801c99d6944b899da6568724826a9',\n",
|
1003 |
+
" '24d0a98cbe0d4450a1e3994c6dab3a15',\n",
|
1004 |
+
" '3fa0add1d0b642f296690f408b0372c8',\n",
|
1005 |
+
" '1d657774ca004dc79b7fcb36ac85e26e',\n",
|
1006 |
+
" '7e69a8dc03104c72bdfc8cb6c2fcf9ee',\n",
|
1007 |
+
" '40c85cbead07447a8ba67f4c279ffd8b',\n",
|
1008 |
+
" '7b595edf61784a549e64edfc1e18a497',\n",
|
1009 |
+
" 'a4d50cd2c1534c02b0a34458d25920d0',\n",
|
1010 |
+
" '318e51c677b949d09d9a61fb7a069082',\n",
|
1011 |
+
" 'a45a8c5d6a6a47ccbafb57a1bb45c4b0',\n",
|
1012 |
+
" 'a0c089e55b15476e8a89292b31b310fc',\n",
|
1013 |
+
" '41fcfb134a8a44c9931edccd36627ca2',\n",
|
1014 |
+
" '7e7687c8087a4174850cd19935c845bd',\n",
|
1015 |
+
" 'a812d2adb03546538480ad44b33fd2bb',\n",
|
1016 |
+
" '5c5779f29b93468ca603bf37687d068d',\n",
|
1017 |
+
" '50783f0c8c944ff19aa86f2e5ac781ac',\n",
|
1018 |
+
" '4a21f30d19f24fd5a331537371b46dce',\n",
|
1019 |
+
" 'adda51e0076048ca98142173498f3af7',\n",
|
1020 |
+
" '07fa53932ec041d5bcd71d77b273d8d2',\n",
|
1021 |
+
" '18768f62094043548f4d280627a9d3a9',\n",
|
1022 |
+
" 'd5fcc7eeeb154b179028b03beaf8f3f8',\n",
|
1023 |
+
" 'a6859c0e9eb74acfbfde7fefbc76d9b6',\n",
|
1024 |
+
" 'c2e90d49e7f14233acebd2ac10622efc',\n",
|
1025 |
+
" 'e0997f7ee41742ed8a0179f9805bf12e',\n",
|
1026 |
+
" '9b8dc4963513406d90a71935b05a7601',\n",
|
1027 |
+
" '1cfc3fe1c73f43f69d776c880641baa9',\n",
|
1028 |
+
" '34fc9ec282fe475d8314ea0a3a44b881',\n",
|
1029 |
+
" '0943841a14ef474b8844520d91bfee9c',\n",
|
1030 |
+
" 'ef27e818cad1462aa3bf7d9aaa19700f',\n",
|
1031 |
+
" '6e2fdf8601904078986ebb1c71bc8168',\n",
|
1032 |
+
" 'c18bf0d5ee5e45c8806cc1ea7d486bba',\n",
|
1033 |
+
" 'a87b5e1ac2dd42d488929580625996de',\n",
|
1034 |
+
" '8d384236c7e448439e9230a000d6aaf7',\n",
|
1035 |
+
" '6dabbe7ff4814ab4bdcb84715ab20af4',\n",
|
1036 |
+
" '943a13d4ef0d4bc2a81b16078b580e78',\n",
|
1037 |
+
" 'baa6fd13ff6f445a8adc6482e59eb411',\n",
|
1038 |
+
" '57b39b4eee4d4a0aac587d53ef68ff8d',\n",
|
1039 |
+
" 'fd07301c3d554f02a0793d5f7bb63f35',\n",
|
1040 |
+
" 'e99969118642471f887206e1c6a507e7',\n",
|
1041 |
+
" '584dc05d60844536bdaac7bb3c1b7cd4',\n",
|
1042 |
+
" 'f1ba0bf7f2b54612a3ffa4a66dd0989c',\n",
|
1043 |
+
" 'e254c679f27c4143bcdda32f15e846e1',\n",
|
1044 |
+
" '2d51c45ed9ca4a3ab95590cff7047d37',\n",
|
1045 |
+
" '3bc9228d85364bd1b1f5bb7d13af5a8c',\n",
|
1046 |
+
" 'b2de27d51d964d8c9e190f42d7ad9768',\n",
|
1047 |
+
" 'e08419ed7e154a7da62745b3bd5ebd78',\n",
|
1048 |
+
" '70a099122b2d43dbbe23d375432beaa1',\n",
|
1049 |
+
" '01c551bc80134225ad0391bb295b365c',\n",
|
1050 |
+
" 'c2388131208c4d8c868af7d7e6405cca',\n",
|
1051 |
+
" 'bb0e3a6a10cc4ac29a168fcee5042c17',\n",
|
1052 |
+
" '4b20b4b550da419d88ee62758c495138',\n",
|
1053 |
+
" '5ea09fb9fb074218b814dde11c1aed3f',\n",
|
1054 |
+
" 'ee62a59cd0784f4cbabb349725d7fe78',\n",
|
1055 |
+
" 'a58457312c084595ab30bd5c59d0b3cb',\n",
|
1056 |
+
" 'af5f60ae6a3a48129b15c89bebe493cf',\n",
|
1057 |
+
" 'e5fddd76079f492f83ab1582f8d46893',\n",
|
1058 |
+
" 'fd4e785b8d9c4d7bb8af493c54ee6870',\n",
|
1059 |
+
" 'd91c68bc847c4c5993e6fa53b657b504',\n",
|
1060 |
+
" '1bfd2c9fa301401a91ff49843b1c842c',\n",
|
1061 |
+
" '9684447b6a9044339ff355877d86f7d3',\n",
|
1062 |
+
" '6fcdfdbf98c64c8b89a3730f72f8268f',\n",
|
1063 |
+
" '6ccdf85fa1ce4bd19775b7fcf5a12ee7',\n",
|
1064 |
+
" 'e67bb1231dce4f35b3914e3a40bb9c12',\n",
|
1065 |
+
" '7a55571c1f654084844ec308bba0ba42',\n",
|
1066 |
+
" '38d650edd70742898f976233a7dfb85a',\n",
|
1067 |
+
" '496b340000dc4eb09780ce18b3ba5392',\n",
|
1068 |
+
" 'a79b3b9c6ea74599885fdb8d28d12cfd',\n",
|
1069 |
+
" 'f4de9dfe9b9c47f7aee86802c145d2d4',\n",
|
1070 |
+
" 'c5b4d1fed5874094acaadc869998174f',\n",
|
1071 |
+
" 'd4ca3a34bfeb41bc8065e682213eaaab',\n",
|
1072 |
+
" '40786361d73e4a58a173a099821b3020',\n",
|
1073 |
+
" 'c267c513aa0049168f2ab2e2444029c9',\n",
|
1074 |
+
" 'de47fda5d45340b58a4febf243c18c90',\n",
|
1075 |
+
" 'b8f6f44e9ca64d28956c159f9aa284bb',\n",
|
1076 |
+
" 'cb6cd0c5ffb743ce8d07d0c02ed2cbe3',\n",
|
1077 |
+
" 'f42720e5bcd94c4ab0f3880cf75dbb50',\n",
|
1078 |
+
" '3bd78527eff54c5db6ead2f0471d1b55',\n",
|
1079 |
+
" 'c5257e382fbc4f69a16aa0bc047dfee2',\n",
|
1080 |
+
" '22df791dc8fa45c9867e2cd4de171bd9',\n",
|
1081 |
+
" 'b3a0c0feba764bd0abfb446204a8239f',\n",
|
1082 |
+
" 'ba39d1b49cf840289c2d2d04e88948cd',\n",
|
1083 |
+
" '1a956280d4db49aea6007c9c1d0f698a',\n",
|
1084 |
+
" 'e7062f6c5dba476facf895b6faee99cd',\n",
|
1085 |
+
" '95f2febaef5a433f89c11d3e9741347f',\n",
|
1086 |
+
" 'fded3452cdbf42fa90f7fadfacd5dd63',\n",
|
1087 |
+
" '0a89bd45fc9d4148828cddb02a0921e7']"
|
1088 |
+
]
|
1089 |
+
},
|
1090 |
+
"execution_count": 25,
|
1091 |
+
"metadata": {},
|
1092 |
+
"output_type": "execute_result"
|
1093 |
+
}
|
1094 |
+
],
|
1095 |
+
"source": [
|
1096 |
+
"# Vector Store with recursive chunked documents\n",
|
1097 |
+
"\n",
|
1098 |
+
"recursive_collection_name = \"ai-safety-sr-arctic-embed-l-recursive\"\n",
|
1099 |
+
"\n",
|
1100 |
+
"recursive_qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ[\"QDRANT_API_KEY\"])\n",
|
1101 |
+
"# recursive_qdrant_client.create_collection(\n",
|
1102 |
+
"# collection_name=recursive_collection_name,\n",
|
1103 |
+
"# vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),\n",
|
1104 |
+
"# )\n",
|
1105 |
+
"\n",
|
1106 |
+
"recursive_vector_store = QdrantVectorStore(\n",
|
1107 |
+
" client=recursive_qdrant_client,\n",
|
1108 |
+
" collection_name=recursive_collection_name,\n",
|
1109 |
+
" embedding=embedding_model,\n",
|
1110 |
+
")\n",
|
1111 |
+
"\n",
|
1112 |
+
"recursive_vector_store.add_documents(recursive_chunked_docs)"
|
1113 |
+
]
|
1114 |
+
},
|
1115 |
+
{
|
1116 |
+
"cell_type": "code",
|
1117 |
+
"execution_count": 26,
|
1118 |
+
"metadata": {},
|
1119 |
+
"outputs": [],
|
1120 |
+
"source": [
|
1121 |
+
"recursive_retriever = recursive_vector_store.as_retriever(search_type=\"similarity_score_threshold\",\n",
|
1122 |
+
" search_kwargs={'k':10,'score_threshold': 0.8})"
|
1123 |
+
]
|
1124 |
+
},
|
1125 |
+
{
|
1126 |
+
"cell_type": "code",
|
1127 |
+
"execution_count": 28,
|
1128 |
+
"metadata": {},
|
1129 |
+
"outputs": [
|
1130 |
+
{
|
1131 |
+
"data": {
|
1132 |
+
"text/plain": [
|
1133 |
+
"[Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 11, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': '70fb8aa0-96a7-4d0a-9757-05ac44f08577', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='FROM \\nPRINCIPLES \\nTO PRACTICE \\nA TECHINCAL COMPANION TO\\nTHE Blueprint for an \\nAI BILL OF RIGHTS\\n12'),\n",
|
1134 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 50, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': 'e254c679-f27c-4143-bcdd-a32f15e846e1', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='• Accessibility and reasonable \\naccommodations \\n• AI actor credentials and qualifications \\n• Alignment to organizational values \\n• Auditing and assessment \\n• Change-management controls \\n• Commercial use \\n• Data provenance'),\n",
|
1135 |
+
" Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 19, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': '2c07a476-9e85-425a-9a32-a053f1293ff2', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='organization’s business processes or other activities, system goals, any human-run procedures that form a \\npart of the system, and specific performance expectations; a description of any data used to train machine \\nlearning models or for other purposes, including how data sources were processed and interpreted, a \\nsummary of what data might be missing, incomplete, or erroneous, and data relevancy justifications; the \\nresults of public consultation such as concerns raised and any decisions made due to these concerns; risk \\nidentification and management assessments and any steps taken to mitigate potential harms; the results of \\nperformance testing including, but not limited to, accuracy, differential demographic impact, resulting \\nerror rates (overall and per demographic group), and comparisons to previously deployed systems; \\nongoing monitoring procedures and regular performance testing reports, including monitoring frequency,'),\n",
|
1136 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 51, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': 'e08419ed-7e15-4a7d-a627-45b3bd5ebd78', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='lifecycle and informed by representative AI Actors (see Figure 3 of the AI RMF). Until new and rigorous'),\n",
|
1137 |
+
" Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 25, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': 'd9ebc600-4412-4f0a-890f-7836cb58f4a2', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='for any resulting algorithmic discrimination. \\n26\\nAlgorithmic \\nDiscrimination \\nProtections'),\n",
|
1138 |
+
" Document(metadata={'source': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'file_path': 'https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf', 'page': 0, 'total_pages': 73, 'format': 'PDF 1.6', 'title': 'Blueprint for an AI Bill of Rights', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe Illustrator 26.3 (Macintosh)', 'producer': 'iLovePDF', 'creationDate': \"D:20220920133035-04'00'\", 'modDate': \"D:20221003104118-04'00'\", 'trapped': '', '_id': 'dd370438-231c-41db-b7b1-b4f1e7673cf7', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='BLUEPRINT FOR AN \\nAI BILL OF \\nRIGHTS \\nMAKING AUTOMATED \\nSYSTEMS WORK FOR \\nTHE AMERICAN PEOPLE \\nOCTOBER 2022'),\n",
|
1139 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 38, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '7b595edf-6178-4a54-9e64-edfc1e18a497', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='guide the design of provenance data-tracking techniques. \\nHuman-AI Configuration; \\nInformation Integrity \\nMS-2.10-003 Verify deduplication of GAI training data samples, particularly regarding synthetic \\ndata. \\nHarmful Bias and Homogenization \\nAI Actor Tasks: AI Deployment, AI Impact Assessment, Domain Experts, End-Users, Operation and Monitoring, TEVV'),\n",
|
1140 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 59, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': 'b8f6f44e-9ca6-4d28-956c-159f9aa284bb', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='https://www.bloomberg.com/graphics/2023-generative-ai-bias/. \\nNational Institute of Standards and Technology (2024) Adversarial Machine Learning: A Taxonomy and \\nTerminology of Attacks and Mitigations https://csrc.nist.gov/pubs/ai/100/2/e2023/final \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework. \\nhttps://www.nist.gov/itl/ai-risk-management-framework \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework, Chapter 3: AI \\nRisks and Trustworthiness. \\nhttps://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Foundational_Information/3-sec-characteristics \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework, Chapter 6: AI \\nRMF Profiles. https://airc.nist.gov/AI_RMF_Knowledge_Base/AI_RMF/Core_And_Profiles/6-sec-profile \\nNational Institute of Standards and Technology (2023) AI Risk Management Framework, Appendix A: \\nDescriptions of AI Actor Tasks.'),\n",
|
1141 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 57, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': '38d650ed-d707-4289-8f97-6233a7dfb85a', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='54 \\nAppendix B. References \\nAcemoglu, D. (2024) The Simple Macroeconomics of AI https://www.nber.org/papers/w32487 \\nAI Incident Database. https://incidentdatabase.ai/ \\nAtherton, D. (2024) Deepfakes and Child Safety: A Survey and Analysis of 2023 Incidents and Responses. \\nAI Incident Database. https://incidentdatabase.ai/blog/deepfakes-and-child-safety/ \\nBadyal, N. et al. (2023) Intentional Biases in LLM Responses. arXiv. https://arxiv.org/pdf/2311.07611 \\nBing Chat: Data Exfiltration Exploit Explained. Embrace The Red. \\nhttps://embracethered.com/blog/posts/2023/bing-chat-data-exfiltration-poc-and-fix/ \\nBommasani, R. et al. (2022) Picking on the Same Person: Does Algorithmic Monoculture lead to Outcome \\nHomogenization? arXiv. https://arxiv.org/pdf/2211.13972 \\nBoyarskaya, M. et al. (2020) Overcoming Failures of Imagination in AI Infused System Development and \\nDeployment. arXiv. https://arxiv.org/pdf/2011.13416 \\nBrowne, D. et al. (2023) Securing the AI Pipeline. Mandiant.'),\n",
|
1142 |
+
" Document(metadata={'source': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'file_path': 'https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf', 'page': 12, 'total_pages': 64, 'format': 'PDF 1.6', 'title': 'Artificial Intelligence Risk Management Framework: Generative Artificial Intelligence Profile', 'author': 'National Institute of Standards and Technology', 'subject': '', 'keywords': '', 'creator': 'Acrobat PDFMaker 24 for Word', 'producer': 'Adobe PDF Library 24.2.159', 'creationDate': \"D:20240805141702-04'00'\", 'modDate': \"D:20240805143048-04'00'\", 'trapped': '', '_id': 'd5048b39-7eb0-4c6f-b97a-083b66aa6ac2', '_collection_name': 'ai-safety-sr-arctic-embed-l-recursive'}, page_content='Priorities Related to Information Integrity Research and Development.')]"
|
1143 |
+
]
|
1144 |
+
},
|
1145 |
+
"execution_count": 28,
|
1146 |
+
"metadata": {},
|
1147 |
+
"output_type": "execute_result"
|
1148 |
+
}
|
1149 |
+
],
|
1150 |
+
"source": [
|
1151 |
+
"recursive_retriever.invoke(\"What steps can organizations take to minimize bias in AI models?\")"
|
1152 |
+
]
|
1153 |
+
},
|
1154 |
+
{
|
1155 |
+
"cell_type": "code",
|
1156 |
+
"execution_count": 37,
|
1157 |
+
"metadata": {},
|
1158 |
+
"outputs": [],
|
1159 |
+
"source": [
|
1160 |
+
"# Trying Compression retriver\n",
|
1161 |
+
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
1162 |
+
"from langchain.retrievers.document_compressors import LLMChainExtractor\n",
|
1163 |
+
"from langchain_openai import ChatOpenAI\n",
|
1164 |
+
"\n",
|
1165 |
+
"base_retriever = recursive_retriever\n",
|
1166 |
+
"\n",
|
1167 |
+
"#Create a contextual compressor\n",
|
1168 |
+
"compressor_llm = ChatOpenAI(temperature=0, model_name=\"gpt-4o\", max_tokens=4000)\n",
|
1169 |
+
"compressor = LLMChainExtractor.from_llm(compressor_llm)\n",
|
1170 |
+
"\n",
|
1171 |
+
"#Combine the retriever with the compressor\n",
|
1172 |
+
"compression_retriever = ContextualCompressionRetriever(\n",
|
1173 |
+
" base_compressor=compressor,\n",
|
1174 |
+
" base_retriever=base_retriever\n",
|
1175 |
+
")\n",
|
1176 |
+
"\n"
|
1177 |
+
]
|
1178 |
+
},
|
1179 |
+
{
|
1180 |
+
"cell_type": "code",
|
1181 |
+
"execution_count": 18,
|
1182 |
+
"metadata": {},
|
1183 |
+
"outputs": [],
|
1184 |
+
"source": [
|
1185 |
+
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter Your OpenAI API Key: \")"
|
1186 |
+
]
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"cell_type": "code",
|
1190 |
+
"execution_count": 16,
|
1191 |
+
"metadata": {},
|
1192 |
+
"outputs": [],
|
1193 |
+
"source": [
|
1194 |
+
"from langchain.prompts import ChatPromptTemplate\n",
|
1195 |
+
"\n",
|
1196 |
+
"RAG_PROMPT = \"\"\"\\\n",
|
1197 |
+
"Given a provided context and question, you must answer the question based only on context.\n",
|
1198 |
+
"\n",
|
1199 |
+
"If you cannot answer the question based on the context - you must say \"I don't know\".\n",
|
1200 |
+
"\n",
|
1201 |
+
"Context: {context}\n",
|
1202 |
+
"Question: {question}\n",
|
1203 |
+
"\"\"\"\n",
|
1204 |
+
"\n",
|
1205 |
+
"rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)"
|
1206 |
+
]
|
1207 |
+
},
|
1208 |
+
{
|
1209 |
+
"cell_type": "code",
|
1210 |
+
"execution_count": 19,
|
1211 |
+
"metadata": {},
|
1212 |
+
"outputs": [],
|
1213 |
+
"source": [
|
1214 |
+
"from langchain_openai import ChatOpenAI\n",
|
1215 |
+
"\n",
|
1216 |
+
"# Using the same model used in the app.\n",
|
1217 |
+
"chat_model_name = \"gpt-4o\"\n",
|
1218 |
+
"llm = ChatOpenAI(model=chat_model_name)"
|
1219 |
+
]
|
1220 |
+
},
|
1221 |
+
{
|
1222 |
+
"cell_type": "code",
|
1223 |
+
"execution_count": 38,
|
1224 |
+
"metadata": {},
|
1225 |
+
"outputs": [],
|
1226 |
+
"source": [
|
1227 |
+
"from operator import itemgetter\n",
|
1228 |
+
"from langchain_core.runnables import RunnablePassthrough, RunnableParallel\n",
|
1229 |
+
"from langchain.schema import StrOutputParser\n",
|
1230 |
+
"\n",
|
1231 |
+
"ai_safety_rag_chain = (\n",
|
1232 |
+
" {\"context\": itemgetter(\"question\") | compression_retriever, \"question\": itemgetter(\"question\")}\n",
|
1233 |
+
" | rag_prompt | llm | StrOutputParser()\n",
|
1234 |
+
")"
|
1235 |
+
]
|
1236 |
+
},
|
1237 |
+
{
|
1238 |
+
"cell_type": "code",
|
1239 |
+
"execution_count": 39,
|
1240 |
+
"metadata": {},
|
1241 |
+
"outputs": [
|
1242 |
+
{
|
1243 |
+
"data": {
|
1244 |
+
"text/plain": [
|
1245 |
+
"'Companies can ensure AI does not violate data privacy laws by incorporating built-in protections and ensuring that data collection conforms to reasonable expectations. They should collect only the data strictly necessary for the specific context and seek user permission, respecting their decisions regarding the collection, use, access, transfer, and deletion of data. If obtaining user permission is not possible, alternative privacy by design safeguards should be used. Additionally, systems should avoid user experience and design decisions that obfuscate user choice or burden users with privacy-invasive defaults.'"
|
1246 |
+
]
|
1247 |
+
},
|
1248 |
+
"execution_count": 39,
|
1249 |
+
"metadata": {},
|
1250 |
+
"output_type": "execute_result"
|
1251 |
+
}
|
1252 |
+
],
|
1253 |
+
"source": [
|
1254 |
+
"ai_safety_rag_chain.invoke({\"question\" : \"How can companies ensure AI does not violate data privacy laws?\"})"
|
1255 |
+
]
|
1256 |
+
},
|
1257 |
+
{
|
1258 |
+
"cell_type": "code",
|
1259 |
+
"execution_count": 40,
|
1260 |
+
"metadata": {},
|
1261 |
+
"outputs": [
|
1262 |
+
{
|
1263 |
+
"data": {
|
1264 |
+
"text/plain": [
|
1265 |
+
"\"I don't know.\""
|
1266 |
+
]
|
1267 |
+
},
|
1268 |
+
"execution_count": 40,
|
1269 |
+
"metadata": {},
|
1270 |
+
"output_type": "execute_result"
|
1271 |
+
}
|
1272 |
+
],
|
1273 |
+
"source": [
|
1274 |
+
"ai_safety_rag_chain.invoke({\"question\" : \"What are the implications of using GAI systems for organizations in terms of risk management and compliance?\"})"
|
1275 |
+
]
|
1276 |
}
|
1277 |
],
|
1278 |
"metadata": {
|
requirements.txt
CHANGED
@@ -3,8 +3,8 @@ chainlit==0.7.700 # 1.1.402
|
|
3 |
openai==1.44.1
|
4 |
qdrant-client==1.11.2
|
5 |
langchain==0.3.0
|
6 |
-
langchain-text-splitters==0.3.0
|
7 |
langchain-community==0.3.0
|
|
|
8 |
langchain_experimental
|
9 |
langchain_qdrant
|
10 |
langchain_openai
|
@@ -12,4 +12,6 @@ pypdf==4.3.1
|
|
12 |
PyMuPDF==1.24.10
|
13 |
pymupdf4llm
|
14 |
sentence_transformers
|
15 |
-
langchain_huggingface
|
|
|
|
|
|
3 |
openai==1.44.1
|
4 |
qdrant-client==1.11.2
|
5 |
langchain==0.3.0
|
|
|
6 |
langchain-community==0.3.0
|
7 |
+
langchain-text-splitters==0.3.0
|
8 |
langchain_experimental
|
9 |
langchain_qdrant
|
10 |
langchain_openai
|
|
|
12 |
PyMuPDF==1.24.10
|
13 |
pymupdf4llm
|
14 |
sentence_transformers
|
15 |
+
langchain_huggingface
|
16 |
+
ragas
|
17 |
+
langsmith
|
task3-generate-dataset-ragas-eval.ipynb
ADDED
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# Synthetic data generation using Ragas framework"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "markdown",
|
12 |
+
"metadata": {},
|
13 |
+
"source": [
|
14 |
+
"> Python packages are installed from `requirements.txt` file into virtual environment"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "code",
|
19 |
+
"execution_count": null,
|
20 |
+
"metadata": {},
|
21 |
+
"outputs": [],
|
22 |
+
"source": [
|
23 |
+
"!pip install -qU langsmith langchain-core langchain-community langchain-openai langchain-qdrant langchain_experimental pymupdf ragas"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": 2,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [
|
31 |
+
{
|
32 |
+
"name": "stdout",
|
33 |
+
"output_type": "stream",
|
34 |
+
"text": [
|
35 |
+
"aiofiles==23.2.1\n",
|
36 |
+
"aiohappyeyeballs==2.4.0\n",
|
37 |
+
"aiohttp==3.10.5\n",
|
38 |
+
"aiosignal==1.3.1\n",
|
39 |
+
"annotated-types==0.7.0\n",
|
40 |
+
"anyio==3.7.1\n",
|
41 |
+
"appdirs==1.4.4\n",
|
42 |
+
"appnope==0.1.4\n",
|
43 |
+
"asttokens==2.4.1\n",
|
44 |
+
"asyncer==0.0.2\n",
|
45 |
+
"attrs==24.2.0\n",
|
46 |
+
"bidict==0.23.1\n",
|
47 |
+
"certifi==2024.8.30\n",
|
48 |
+
"chainlit==0.7.700\n",
|
49 |
+
"charset-normalizer==3.3.2\n",
|
50 |
+
"click==8.1.7\n",
|
51 |
+
"comm==0.2.2\n",
|
52 |
+
"dataclasses-json==0.5.14\n",
|
53 |
+
"datasets==3.0.0\n",
|
54 |
+
"debugpy==1.8.5\n",
|
55 |
+
"decorator==5.1.1\n",
|
56 |
+
"Deprecated==1.2.14\n",
|
57 |
+
"dill==0.3.8\n",
|
58 |
+
"distro==1.9.0\n",
|
59 |
+
"executing==2.1.0\n",
|
60 |
+
"fastapi==0.100.1\n",
|
61 |
+
"fastapi-socketio==0.0.10\n",
|
62 |
+
"filelock==3.16.1\n",
|
63 |
+
"filetype==1.2.0\n",
|
64 |
+
"frozenlist==1.4.1\n",
|
65 |
+
"fsspec==2024.6.1\n",
|
66 |
+
"googleapis-common-protos==1.65.0\n",
|
67 |
+
"grpcio==1.66.1\n",
|
68 |
+
"grpcio-tools==1.62.3\n",
|
69 |
+
"h11==0.14.0\n",
|
70 |
+
"h2==4.1.0\n",
|
71 |
+
"hpack==4.0.0\n",
|
72 |
+
"httpcore==0.17.3\n",
|
73 |
+
"httpx==0.24.1\n",
|
74 |
+
"huggingface-hub==0.25.0\n",
|
75 |
+
"hyperframe==6.0.1\n",
|
76 |
+
"idna==3.10\n",
|
77 |
+
"importlib_metadata==8.4.0\n",
|
78 |
+
"ipykernel==6.29.5\n",
|
79 |
+
"ipython==8.27.0\n",
|
80 |
+
"jedi==0.19.1\n",
|
81 |
+
"Jinja2==3.1.4\n",
|
82 |
+
"jiter==0.5.0\n",
|
83 |
+
"joblib==1.4.2\n",
|
84 |
+
"jsonpatch==1.33\n",
|
85 |
+
"jsonpointer==3.0.0\n",
|
86 |
+
"jupyter_client==8.6.3\n",
|
87 |
+
"jupyter_core==5.7.2\n",
|
88 |
+
"langchain==0.3.0\n",
|
89 |
+
"langchain-community==0.3.0\n",
|
90 |
+
"langchain-core==0.3.5\n",
|
91 |
+
"langchain-experimental==0.3.0\n",
|
92 |
+
"langchain-huggingface==0.1.0\n",
|
93 |
+
"langchain-openai==0.2.0\n",
|
94 |
+
"langchain-qdrant==0.1.4\n",
|
95 |
+
"langchain-text-splitters==0.3.0\n",
|
96 |
+
"langsmith==0.1.125\n",
|
97 |
+
"Lazify==0.4.0\n",
|
98 |
+
"MarkupSafe==2.1.5\n",
|
99 |
+
"marshmallow==3.22.0\n",
|
100 |
+
"matplotlib-inline==0.1.7\n",
|
101 |
+
"mpmath==1.3.0\n",
|
102 |
+
"multidict==6.1.0\n",
|
103 |
+
"multiprocess==0.70.16\n",
|
104 |
+
"mypy-extensions==1.0.0\n",
|
105 |
+
"nest-asyncio==1.6.0\n",
|
106 |
+
"networkx==3.3\n",
|
107 |
+
"numpy==1.26.4\n",
|
108 |
+
"openai==1.44.1\n",
|
109 |
+
"opentelemetry-api==1.27.0\n",
|
110 |
+
"opentelemetry-exporter-otlp==1.27.0\n",
|
111 |
+
"opentelemetry-exporter-otlp-proto-common==1.27.0\n",
|
112 |
+
"opentelemetry-exporter-otlp-proto-grpc==1.27.0\n",
|
113 |
+
"opentelemetry-exporter-otlp-proto-http==1.27.0\n",
|
114 |
+
"opentelemetry-instrumentation==0.48b0\n",
|
115 |
+
"opentelemetry-proto==1.27.0\n",
|
116 |
+
"opentelemetry-sdk==1.27.0\n",
|
117 |
+
"opentelemetry-semantic-conventions==0.48b0\n",
|
118 |
+
"orjson==3.10.7\n",
|
119 |
+
"packaging==23.2\n",
|
120 |
+
"pandas==2.2.3\n",
|
121 |
+
"parso==0.8.4\n",
|
122 |
+
"pexpect==4.9.0\n",
|
123 |
+
"pillow==10.4.0\n",
|
124 |
+
"platformdirs==4.3.6\n",
|
125 |
+
"portalocker==2.10.1\n",
|
126 |
+
"prompt_toolkit==3.0.47\n",
|
127 |
+
"protobuf==4.25.5\n",
|
128 |
+
"psutil==6.0.0\n",
|
129 |
+
"ptyprocess==0.7.0\n",
|
130 |
+
"pure_eval==0.2.3\n",
|
131 |
+
"pyarrow==17.0.0\n",
|
132 |
+
"pydantic==2.9.2\n",
|
133 |
+
"pydantic-settings==2.5.2\n",
|
134 |
+
"pydantic_core==2.23.4\n",
|
135 |
+
"Pygments==2.18.0\n",
|
136 |
+
"PyJWT==2.9.0\n",
|
137 |
+
"PyMuPDF==1.24.10\n",
|
138 |
+
"pymupdf4llm==0.0.17\n",
|
139 |
+
"PyMuPDFb==1.24.10\n",
|
140 |
+
"pypdf==4.3.1\n",
|
141 |
+
"pysbd==0.3.4\n",
|
142 |
+
"python-dateutil==2.9.0.post0\n",
|
143 |
+
"python-dotenv==1.0.1\n",
|
144 |
+
"python-engineio==4.9.1\n",
|
145 |
+
"python-graphql-client==0.4.3\n",
|
146 |
+
"python-multipart==0.0.6\n",
|
147 |
+
"python-socketio==5.11.4\n",
|
148 |
+
"pytz==2024.2\n",
|
149 |
+
"PyYAML==6.0.2\n",
|
150 |
+
"pyzmq==26.2.0\n",
|
151 |
+
"qdrant-client==1.11.2\n",
|
152 |
+
"ragas==0.1.19\n",
|
153 |
+
"regex==2024.9.11\n",
|
154 |
+
"requests==2.32.3\n",
|
155 |
+
"safetensors==0.4.5\n",
|
156 |
+
"scikit-learn==1.5.2\n",
|
157 |
+
"scipy==1.14.1\n",
|
158 |
+
"sentence-transformers==3.1.1\n",
|
159 |
+
"simple-websocket==1.0.0\n",
|
160 |
+
"six==1.16.0\n",
|
161 |
+
"sniffio==1.3.1\n",
|
162 |
+
"SQLAlchemy==2.0.35\n",
|
163 |
+
"stack-data==0.6.3\n",
|
164 |
+
"starlette==0.27.0\n",
|
165 |
+
"sympy==1.13.3\n",
|
166 |
+
"syncer==2.0.3\n",
|
167 |
+
"tenacity==8.5.0\n",
|
168 |
+
"threadpoolctl==3.5.0\n",
|
169 |
+
"tiktoken==0.7.0\n",
|
170 |
+
"tokenizers==0.19.1\n",
|
171 |
+
"tomli==2.0.1\n",
|
172 |
+
"torch==2.4.1\n",
|
173 |
+
"tornado==6.4.1\n",
|
174 |
+
"tqdm==4.66.5\n",
|
175 |
+
"traitlets==5.14.3\n",
|
176 |
+
"transformers==4.44.2\n",
|
177 |
+
"typing-inspect==0.9.0\n",
|
178 |
+
"typing_extensions==4.12.2\n",
|
179 |
+
"tzdata==2024.1\n",
|
180 |
+
"uptrace==1.26.0\n",
|
181 |
+
"urllib3==2.2.3\n",
|
182 |
+
"uvicorn==0.23.2\n",
|
183 |
+
"watchfiles==0.20.0\n",
|
184 |
+
"wcwidth==0.2.13\n",
|
185 |
+
"websockets==13.1\n",
|
186 |
+
"wrapt==1.16.0\n",
|
187 |
+
"wsproto==1.2.0\n",
|
188 |
+
"xxhash==3.5.0\n",
|
189 |
+
"yarl==1.11.1\n",
|
190 |
+
"zipp==3.20.2\n"
|
191 |
+
]
|
192 |
+
}
|
193 |
+
],
|
194 |
+
"source": [
|
195 |
+
"!pip freeze\n"
|
196 |
+
]
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"cell_type": "code",
|
200 |
+
"execution_count": null,
|
201 |
+
"metadata": {},
|
202 |
+
"outputs": [],
|
203 |
+
"source": [
|
204 |
+
"import os\n",
|
205 |
+
"import getpass\n",
|
206 |
+
"from uuid import uuid4\n",
|
207 |
+
"\n",
|
208 |
+
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
209 |
+
"os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"LangChain API Key:\")\n",
|
210 |
+
"\n",
|
211 |
+
"os.environ[\"LANGCHAIN_PROJECT\"] = \"AIM-SDG-MidTerm - AI Safety\"\n",
|
212 |
+
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
213 |
+
"\n",
|
214 |
+
"os.environ[\"QDRANT_API_KEY\"] = getpass.getpass(\"Enter Your Qdrant API Key: \")"
|
215 |
+
]
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"cell_type": "code",
|
219 |
+
"execution_count": null,
|
220 |
+
"metadata": {},
|
221 |
+
"outputs": [],
|
222 |
+
"source": [
|
223 |
+
"from pdfloader import PDFLoaderWrapper\n",
|
224 |
+
"from langchain_experimental.text_splitter import SemanticChunker\n",
|
225 |
+
"\n",
|
226 |
+
"BOR_FILE_PATH = \"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf\"\n",
|
227 |
+
"NIST_FILE_PATH = \"https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf\"\n",
|
228 |
+
"SMALL_DOC = \"https://arxiv.org/pdf/1908.10084\" \n",
|
229 |
+
"documents_to_preload = [\n",
|
230 |
+
" BOR_FILE_PATH,\n",
|
231 |
+
" NIST_FILE_PATH\n",
|
232 |
+
" # SMALL_DOC\n",
|
233 |
+
"]\n",
|
234 |
+
"\n",
|
235 |
+
"pdf_loader = PDFLoaderWrapper(\n",
|
236 |
+
" documents_to_preload, PDFLoaderWrapper.LoaderType.PYMUPDF\n",
|
237 |
+
")\n",
|
238 |
+
"documents = await pdf_loader.aload()\n",
|
239 |
+
"\n"
|
240 |
+
]
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"cell_type": "code",
|
244 |
+
"execution_count": null,
|
245 |
+
"metadata": {},
|
246 |
+
"outputs": [],
|
247 |
+
"source": [
|
248 |
+
"print (\"Importing packages\")\n",
|
249 |
+
"from ragas.testset.generator import TestsetGenerator\n",
|
250 |
+
"from ragas.testset.evolutions import simple, reasoning, multi_context\n",
|
251 |
+
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
252 |
+
"from ragas.testset.docstore import Document, DocumentStore,InMemoryDocumentStore\n",
|
253 |
+
"from langchain_experimental.text_splitter import SemanticChunker\n",
|
254 |
+
"from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline\n",
|
255 |
+
"from ragas.testset.extractor import KeyphraseExtractor\n",
|
256 |
+
"\n",
|
257 |
+
"print (\"Packages import complete\")\n",
|
258 |
+
"print (\"Getting the Embedding model from Huggingface\")\n",
|
259 |
+
"# Using best performing embedding model from hugging face to generate quality dataset.\n",
|
260 |
+
"# Need GPU\n",
|
261 |
+
"model_name = \"Snowflake/snowflake-arctic-embed-l\"\n",
|
262 |
+
"embedding_model = HuggingFaceEmbeddings(model_name=model_name)\n",
|
263 |
+
"print (\"Embedding model loaded\")\n",
|
264 |
+
"\n",
|
265 |
+
"print (\"Splitting the documents into semantic chunks\")\n",
|
266 |
+
"text_splitter = SemanticChunker(embedding_model, breakpoint_threshold_type=\"percentile\",breakpoint_threshold_amount=90)\n",
|
267 |
+
"chunked_docs = text_splitter.split_documents(documents)\n",
|
268 |
+
"\n",
|
269 |
+
"print (\"Creating the document store for ragas and loading LLM models\")\n",
|
270 |
+
"generator_llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
271 |
+
"critic_llm = ChatOpenAI(model=\"gpt-4o\")\n",
|
272 |
+
"\n",
|
273 |
+
"keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)\n",
|
274 |
+
"docstore = InMemoryDocumentStore(splitter=text_splitter,extractor=keyphrase_extractor, embeddings=embedding_model)\n",
|
275 |
+
"\n",
|
276 |
+
"\n",
|
277 |
+
"print (\"Creating the testset generator\")\n",
|
278 |
+
"generator = TestsetGenerator.from_langchain( # Default uses TokenTextSplitter\n",
|
279 |
+
" generator_llm=generator_llm,\n",
|
280 |
+
" critic_llm=critic_llm,\n",
|
281 |
+
" embeddings=embedding_model,\n",
|
282 |
+
" docstore=docstore # Document store uses SemenaticChunker\n",
|
283 |
+
")\n",
|
284 |
+
"\n",
|
285 |
+
"distributions = {\n",
|
286 |
+
" simple: 0.5,\n",
|
287 |
+
" multi_context: 0.3,\n",
|
288 |
+
" reasoning: 0.2\n",
|
289 |
+
"}"
|
290 |
+
]
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"cell_type": "code",
|
294 |
+
"execution_count": null,
|
295 |
+
"metadata": {},
|
296 |
+
"outputs": [],
|
297 |
+
"source": [
|
298 |
+
"tests_per_doc = 2 \n",
|
299 |
+
"test_size = tests_per_doc * len(documents)\n",
|
300 |
+
"\n",
|
301 |
+
"testset = generator.generate_with_langchain_docs(\n",
|
302 |
+
" documents, \n",
|
303 |
+
" test_size, \n",
|
304 |
+
" distributions, \n",
|
305 |
+
" with_debugging_logs=True\n",
|
306 |
+
") # Default RunConfig(max_retries=15, max_wait=90)"
|
307 |
+
]
|
308 |
+
},
|
309 |
+
{
|
310 |
+
"cell_type": "code",
|
311 |
+
"execution_count": null,
|
312 |
+
"metadata": {},
|
313 |
+
"outputs": [],
|
314 |
+
"source": [
|
315 |
+
"testset.to_pandas()"
|
316 |
+
]
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"cell_type": "code",
|
320 |
+
"execution_count": null,
|
321 |
+
"metadata": {},
|
322 |
+
"outputs": [],
|
323 |
+
"source": [
|
324 |
+
"from langsmith import Client\n",
|
325 |
+
"\n",
|
326 |
+
"client = Client()\n",
|
327 |
+
"\n",
|
328 |
+
"dataset_name = \"AI Safety\"\n",
|
329 |
+
"\n",
|
330 |
+
"dataset = client.create_dataset(\n",
|
331 |
+
" dataset_name=dataset_name,\n",
|
332 |
+
" description=\"Questions about AI Safety\"\n",
|
333 |
+
")\n",
|
334 |
+
"\n",
|
335 |
+
"for test in testset.to_pandas().iterrows():\n",
|
336 |
+
" client.create_example(\n",
|
337 |
+
" inputs={\n",
|
338 |
+
" \"question\": test[1][\"question\"]\n",
|
339 |
+
" },\n",
|
340 |
+
" outputs={\n",
|
341 |
+
" \"answer\": test[1][\"ground_truth\"]\n",
|
342 |
+
" },\n",
|
343 |
+
" metadata={\n",
|
344 |
+
" \"context\": test[0]\n",
|
345 |
+
" },\n",
|
346 |
+
" dataset_id=dataset.id\n",
|
347 |
+
" )"
|
348 |
+
]
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"cell_type": "markdown",
|
352 |
+
"metadata": {},
|
353 |
+
"source": [
|
354 |
+
"# Create Rag chain to generate answers for above questions in the dataset"
|
355 |
+
]
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"cell_type": "markdown",
|
359 |
+
"metadata": {},
|
360 |
+
"source": [
|
361 |
+
"> Note that we are usig Qdrant cloud where the pdf document is processed and saved for us to consume. For the RAG pipeline we use the same embedding model originally used to populate the Qdrant vectorstore."
|
362 |
+
]
|
363 |
+
},
|
364 |
+
{
|
365 |
+
"cell_type": "code",
|
366 |
+
"execution_count": null,
|
367 |
+
"metadata": {},
|
368 |
+
"outputs": [],
|
369 |
+
"source": [
|
370 |
+
"from langchain_qdrant import QdrantVectorStore\n",
|
371 |
+
"from langchain_core.documents import Document\n",
|
372 |
+
"from qdrant_client import QdrantClient\n",
|
373 |
+
"from qdrant_client.http.models import Distance, VectorParams\n",
|
374 |
+
"\n",
|
375 |
+
"dimension = 1024\n",
|
376 |
+
"collection_name = \"ai-safety-sr-arctic-embed-l-semantic\"\n",
|
377 |
+
"qdrant_server = \"https://500cb0e8-ea08-4662-b4f2-3eca11e635da.europe-west3-0.gcp.cloud.qdrant.io:6333\"\n",
|
378 |
+
"qdrant_client = QdrantClient(url=qdrant_server,api_key=os.environ[\"QDRANT_API_KEY\"])\n",
|
379 |
+
"qdrant_client.create_collection(\n",
|
380 |
+
" collection_name=collection_name,\n",
|
381 |
+
" vectors_config=VectorParams(size=dimension, distance=Distance.COSINE),\n",
|
382 |
+
")\n",
|
383 |
+
"\n",
|
384 |
+
"vector_store = QdrantVectorStore(\n",
|
385 |
+
" client=qdrant_client,\n",
|
386 |
+
" collection_name=collection_name,\n",
|
387 |
+
" embedding=embedding_model,\n",
|
388 |
+
")\n",
|
389 |
+
"\n",
|
390 |
+
"retriever = vector_store.as_retriever()"
|
391 |
+
]
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"cell_type": "code",
|
395 |
+
"execution_count": null,
|
396 |
+
"metadata": {},
|
397 |
+
"outputs": [],
|
398 |
+
"source": [
|
399 |
+
"from langchain.prompts import ChatPromptTemplate\n",
|
400 |
+
"\n",
|
401 |
+
"RAG_PROMPT = \"\"\"\\\n",
|
402 |
+
"Given a provided context and question, you must answer the question based only on context.\n",
|
403 |
+
"\n",
|
404 |
+
"If you cannot answer the question based on the context - you must say \"I don't know\".\n",
|
405 |
+
"\n",
|
406 |
+
"Context: {context}\n",
|
407 |
+
"Question: {question}\n",
|
408 |
+
"\"\"\"\n",
|
409 |
+
"\n",
|
410 |
+
"rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)"
|
411 |
+
]
|
412 |
+
},
|
413 |
+
{
|
414 |
+
"cell_type": "code",
|
415 |
+
"execution_count": null,
|
416 |
+
"metadata": {},
|
417 |
+
"outputs": [],
|
418 |
+
"source": [
|
419 |
+
"from langchain_openai import ChatOpenAI\n",
|
420 |
+
"\n",
|
421 |
+
"# Using the same model used in the app.\n",
|
422 |
+
"chat_model_name = \"gpt-4o\"\n",
|
423 |
+
"llm = ChatOpenAI(model=chat_model_name)"
|
424 |
+
]
|
425 |
+
},
|
426 |
+
{
|
427 |
+
"cell_type": "code",
|
428 |
+
"execution_count": null,
|
429 |
+
"metadata": {},
|
430 |
+
"outputs": [],
|
431 |
+
"source": [
|
432 |
+
"from operator import itemgetter\n",
|
433 |
+
"from langchain_core.runnables import RunnablePassthrough, RunnableParallel\n",
|
434 |
+
"from langchain.schema import StrOutputParser\n",
|
435 |
+
"\n",
|
436 |
+
"ai_safety_rag_chain = (\n",
|
437 |
+
" {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
|
438 |
+
" | rag_prompt | llm | StrOutputParser()\n",
|
439 |
+
")"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"cell_type": "code",
|
444 |
+
"execution_count": null,
|
445 |
+
"metadata": {},
|
446 |
+
"outputs": [],
|
447 |
+
"source": [
|
448 |
+
"ai_safety_rag_chain.invoke({\"question\" : \"What steps can organizations take to minimize bias in AI models?\"})"
|
449 |
+
]
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"cell_type": "markdown",
|
453 |
+
"metadata": {},
|
454 |
+
"source": [
|
455 |
+
"# LangSmith Evaluation setup"
|
456 |
+
]
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"cell_type": "code",
|
460 |
+
"execution_count": null,
|
461 |
+
"metadata": {},
|
462 |
+
"outputs": [],
|
463 |
+
"source": [
|
464 |
+
"from langsmith.evaluation import LangChainStringEvaluator, evaluate\n",
|
465 |
+
"\n",
|
466 |
+
"eval_llm = ChatOpenAI(model=\"gpt-4o\")\n",
|
467 |
+
"\n",
|
468 |
+
"# Evaluators\n",
|
469 |
+
"qa_evaluator = LangChainStringEvaluator(\"qa\", config={\"llm\" : eval_llm})\n",
|
470 |
+
"\n",
|
471 |
+
"# Faithfulness Evaluator\n",
|
472 |
+
"# Checks whether the generated answer is faithful to the provided source material or context.\n",
|
473 |
+
"faithfulness_evaluator = LangChainStringEvaluator(\n",
|
474 |
+
" \"criteria\",\n",
|
475 |
+
" config={\n",
|
476 |
+
" \"criteria\": {\n",
|
477 |
+
" \"faithfulness\": (\n",
|
478 |
+
" \"Is the answer faithful to the given context?\"\n",
|
479 |
+
" )\n",
|
480 |
+
" },\n",
|
481 |
+
" \"llm\": eval_llm\n",
|
482 |
+
" },\n",
|
483 |
+
" prepare_data=lambda run, example: {\n",
|
484 |
+
" \"prediction\": run.outputs[\"output\"],\n",
|
485 |
+
" \"reference\": example.outputs[\"answer\"],\n",
|
486 |
+
" \"input\": example.inputs[\"question\"],\n",
|
487 |
+
" }\n",
|
488 |
+
")\n",
|
489 |
+
"\n",
|
490 |
+
"# Answer Relevancy Evaluator\n",
|
491 |
+
"# Determines whether the answer is relevant to the user's question.\n",
|
492 |
+
"answer_relevancy_evaluator = LangChainStringEvaluator(\n",
|
493 |
+
" \"criteria\",\n",
|
494 |
+
" config={\n",
|
495 |
+
" \"criteria\": {\n",
|
496 |
+
" \"relevancy\": (\n",
|
497 |
+
" \"Does the answer address the question and provide relevant information?\"\n",
|
498 |
+
" )\n",
|
499 |
+
" },\n",
|
500 |
+
" \"llm\": eval_llm\n",
|
501 |
+
" },\n",
|
502 |
+
" prepare_data=lambda run, example: {\n",
|
503 |
+
" \"prediction\": run.outputs[\"output\"],\n",
|
504 |
+
" \"reference\": example.outputs[\"answer\"],\n",
|
505 |
+
" \"input\": example.inputs[\"question\"],\n",
|
506 |
+
" }\n",
|
507 |
+
")\n",
|
508 |
+
"\n",
|
509 |
+
"# Context Precision Evaluator\n",
|
510 |
+
"# Evaluates how precisely the answer uses information from the given context.\n",
|
511 |
+
"context_precision_evaluator = LangChainStringEvaluator(\n",
|
512 |
+
" \"criteria\",\n",
|
513 |
+
" config={\n",
|
514 |
+
" \"criteria\": {\n",
|
515 |
+
" \"context_precision\": (\n",
|
516 |
+
" \"Does the answer precisely use information from the provided context?\"\n",
|
517 |
+
" )\n",
|
518 |
+
" },\n",
|
519 |
+
" \"llm\": eval_llm\n",
|
520 |
+
" },\n",
|
521 |
+
" prepare_data=lambda run, example: {\n",
|
522 |
+
" \"prediction\": run.outputs[\"output\"],\n",
|
523 |
+
" \"reference\": example.outputs[\"answer\"],\n",
|
524 |
+
" \"input\": example.inputs[\"question\"],\n",
|
525 |
+
" }\n",
|
526 |
+
")\n",
|
527 |
+
"\n",
|
528 |
+
"# Context Recall Evaluator\n",
|
529 |
+
"# Determines if the answer recalls all the necessary and relevant information from the context.\n",
|
530 |
+
"context_recall_evaluator = LangChainStringEvaluator(\n",
|
531 |
+
" \"criteria\",\n",
|
532 |
+
" config={\n",
|
533 |
+
" \"criteria\": {\n",
|
534 |
+
" \"context_recall\": (\n",
|
535 |
+
" \"Does the answer recall all relevant information from the provided context?\"\n",
|
536 |
+
" )\n",
|
537 |
+
" },\n",
|
538 |
+
" \"llm\": eval_llm\n",
|
539 |
+
" },\n",
|
540 |
+
" prepare_data=lambda run, example: {\n",
|
541 |
+
" \"prediction\": run.outputs[\"output\"],\n",
|
542 |
+
" \"reference\": example.outputs[\"answer\"],\n",
|
543 |
+
" \"input\": example.inputs[\"question\"],\n",
|
544 |
+
" }\n",
|
545 |
+
")"
|
546 |
+
]
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"cell_type": "code",
|
550 |
+
"execution_count": null,
|
551 |
+
"metadata": {},
|
552 |
+
"outputs": [],
|
553 |
+
"source": [
|
554 |
+
"evaluate(\n",
|
555 |
+
" ai_safety_rag_chain.invoke,\n",
|
556 |
+
" data=dataset_name,\n",
|
557 |
+
" evaluators=[\n",
|
558 |
+
" qa_evaluator,\n",
|
559 |
+
" faithfulness_evaluator,\n",
|
560 |
+
" answer_relevancy_evaluator,\n",
|
561 |
+
" context_precision_evaluator,\n",
|
562 |
+
" context_recall_evaluator\n",
|
563 |
+
" ],\n",
|
564 |
+
" metadata={\"revision_id\": \"ai_safety_rag_chain\"},\n",
|
565 |
+
")"
|
566 |
+
]
|
567 |
+
}
|
568 |
+
],
|
569 |
+
"metadata": {
|
570 |
+
"kernelspec": {
|
571 |
+
"display_name": ".venv",
|
572 |
+
"language": "python",
|
573 |
+
"name": "python3"
|
574 |
+
},
|
575 |
+
"language_info": {
|
576 |
+
"codemirror_mode": {
|
577 |
+
"name": "ipython",
|
578 |
+
"version": 3
|
579 |
+
},
|
580 |
+
"file_extension": ".py",
|
581 |
+
"mimetype": "text/x-python",
|
582 |
+
"name": "python",
|
583 |
+
"nbconvert_exporter": "python",
|
584 |
+
"pygments_lexer": "ipython3",
|
585 |
+
"version": "3.11.9"
|
586 |
+
}
|
587 |
+
},
|
588 |
+
"nbformat": 4,
|
589 |
+
"nbformat_minor": 2
|
590 |
+
}
|