AlanaziX commited on
Commit
cdbb5c6
1 Parent(s): ab9fa83

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pip install -qU langchain-community faiss-cpu faiss-gpu langchain-openai sentence_transformers gradio
2
+
3
+ import faiss
4
+ from langchain_community.docstore.in_memory import InMemoryDocstore
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_openai import OpenAIEmbeddings
7
+ import os
8
+ import pandas as pd
9
+ from uuid import uuid4
10
+ from langchain_core.documents import Document
11
+ import numpy as np
12
+ from sentence_transformers import SentenceTransformer
13
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.llms import OpenAI
16
+ from langchain_core.prompts import ChatPromptTemplate
17
+ from langchain import PromptTemplate
18
+ import gradio as gr
19
+
20
+ df = pd.read_csv('news_paper-Cleaned.csv', encoding='utf-8', on_bad_lines='skip')
21
+
22
+ os.environ["OPENAI_API_KEY"] = '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'
23
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
24
+
25
+ index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))
26
+
27
+ vector_store = FAISS(
28
+ embedding_function=embeddings,
29
+ index=index,
30
+ docstore=InMemoryDocstore(),
31
+ index_to_docstore_id={},
32
+ )
33
+
34
+ documents = [{
35
+ 'title': row['title'],
36
+ 'author': row['author'],
37
+ 'description': row['description'],
38
+ 'full_text' : row['full_text']
39
+
40
+ }
41
+ for _, row in df.iterrows()]
42
+
43
+ full_text = [Document(
44
+ page_content=str(doc),
45
+ metadata={"source": "news"},
46
+ ) for doc in documents]
47
+
48
+ text_splitter = RecursiveCharacterTextSplitter(
49
+ # Set a really small chunk size, just to show.
50
+ chunk_size=1000,
51
+ chunk_overlap=100,
52
+ length_function=len,
53
+ is_separator_regex=False,
54
+ )
55
+
56
+ text_split = text_splitter.split_documents(full_text)
57
+
58
+ uuids = [str(uuid4()) for _ in range(len(text_split))]
59
+
60
+ vector_store.add_documents(documents=text_split, ids=uuids)
61
+
62
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10})
63
+
64
+ def questions(query):
65
+
66
+ template = """
67
+ You are a helpful assistant that that can answer questions about specific data.
68
+ You have answer only from this Context.
69
+ You will receive 10 Answer return all and spilt between them by new line.
70
+
71
+ Question: {question}
72
+ Context: {context}
73
+ Answer:
74
+ """
75
+
76
+
77
+ PROMPT = PromptTemplate(template=template, input_variables=['question', 'context'])
78
+
79
+ qa_chain = RetrievalQA.from_chain_type(
80
+ llm=OpenAI(),
81
+ retriever=retriever,
82
+ chain_type_kwargs={"prompt": PROMPT},
83
+ )
84
+
85
+ return qa_chain({"query": query})['result']
86
+
87
+
88
+ demo = gr.Interface(fn=questions, inputs="text", outputs="text")
89
+ demo.launch()