Commit
6cafc91
1 Parent(s): 0b48057

delete rag.py

Browse files
Files changed (1) hide show
  1. rag.py +0 -69
rag.py DELETED
@@ -1,69 +0,0 @@
1
- from sentence_transformers import SentenceTransformer
2
- from wikipediaapi import Wikipedia
3
- import textwrap
4
- import numpy as np
5
- import openai
6
- from openai import OpenAI
7
-
8
- matryoshka_dim = 128
9
- model = SentenceTransformer("Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka", trust_remote_code=True , truncate_dim = matryoshka_dim)
10
-
11
-
12
- wiki = Wikipedia('RAGBot/0.0', 'ar')
13
- doc = wiki.page('جابر بن حيان').text
14
- paragraphs = doc.split('\n\n') # chunking
15
-
16
- for i, p in enumerate(paragraphs):
17
- wrapped_text = textwrap.fill(p, width=100)
18
-
19
- print("-----------------------------------------------------------------")
20
- print(wrapped_text)
21
- print("-----------------------------------------------------------------")
22
-
23
- docs_embed = model.encode(paragraphs, normalize_embeddings=True)
24
-
25
- docs_embed.shape
26
- docs_embed[0]
27
-
28
-
29
- query = "من هو جابر بن حيان؟"
30
- query_embed = model.encode(query, normalize_embeddings=True)
31
-
32
-
33
- similarities = np.dot(docs_embed, query_embed.T)
34
-
35
- similarities.shape
36
- similarities
37
-
38
- top_3_idx = np.argsort(similarities, axis=0)[-3:][::-1].tolist()
39
- top_3_idx
40
-
41
- most_similar_documents = [paragraphs[idx] for idx in top_3_idx]
42
-
43
- CONTEXT = ""
44
- for i, p in enumerate(most_similar_documents):
45
- wrapped_text = textwrap.fill(p, width=100)
46
-
47
- print("-----------------------------------------------------------------")
48
- print(wrapped_text)
49
- print("-----------------------------------------------------------------")
50
- CONTEXT += wrapped_text + "\n\n"
51
-
52
- prompt = f"""
53
- use the following CONTEXT to answer the QUESTION at the end.
54
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
55
-
56
- CONTEXT: {CONTEXT}
57
- QUESTION: {query}
58
- """
59
-
60
- client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
61
-
62
- response = client.chat.completions.create(
63
- model="gpt-4o",
64
- messages=[
65
- {"role": "user", "content": prompt},
66
- ]
67
- )
68
-
69
- print(response.choices[0].message.content)