File size: 3,899 Bytes
a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d a5686cb 6a89c2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
from transformers import pipeline
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever
import numpy as np
import openai
document_store = FAISSDocumentStore.load(
index_path=f"./documents/climate_gpt.faiss",
config_path=f"./documents/climate_gpt.json",
)
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
system_template = {
"role": "system",
"content": "You have been a climate change expert for 30 years. You answer questions about climate change in an educationnal and concise manner. Whenever possible your answers are backed up by facts and numbers from scientific reports.",
}
dense = EmbeddingRetriever(
document_store=document_store,
embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
model_format="sentence_transformers",
)
def is_climate_change_related(sentence: str) -> bool:
results = classifier(
sequences=sentence,
candidate_labels=["climate change related", "non climate change related"],
)
return results["labels"][np.argmax(results["scores"])] == "climate change related"
def make_pairs(lst):
"""from a list of even lenght, make tupple pairs"""
return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)]
def gen_conv(query: str, history=[system_template], ipcc=True):
"""return (answer:str, history:list[dict], sources:str)"""
retrieve = ipcc and is_climate_change_related(query)
sources = ""
messages = history + [
{"role": "user", "content": query},
]
if retrieve:
docs = dense.retrieve(query=query, top_k=5)
sources = "\n\n".join(
[
"If relevant, use those extracts in your answer and give the reference of the information you used."
]
+ [
f"{d.meta['file_name']} Page {d.meta['page_number']}\n{d.content}"
for d in docs
]
)
messages.append({"role": "system", "content": sources})
answer = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.2,
# max_tokens=200,
)["choices"][0]["message"]["content"]
if retrieve:
messages.pop()
# answer = "(top 5 documents retrieved) " + answer
sources = "\n\n".join(
f"{d.meta['file_name']} Page {d.meta['page_number']}:\n{d.content}"
for d in docs
)
messages.append({"role": "assistant", "content": answer})
gradio_format = make_pairs([a["content"] for a in messages[1:]])
return gradio_format, messages, sources
# Gradio
def connect(text):
openai.api_key = text
return "You're all set"
with gr.Blocks(title="Eki IPCC Explorer") as demo:
with gr.Row():
with gr.Column():
api_key = gr.Textbox(label="Open AI api key")
connect_btn = gr.Button(value="Connect")
with gr.Column():
result = gr.Textbox(label="Connection")
connect_btn.click(connect, inputs=api_key, outputs=result, api_name="Connection")
gr.Markdown(
"""
# Ask me anything, I'm a climate expert
"""
)
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot()
state = gr.State([system_template])
with gr.Row():
ask = gr.Textbox(
show_label=False, placeholder="Enter text and press enter"
).style(container=False)
with gr.Column(scale=1, variant="panel"):
gr.Markdown("### Sources")
sources_textbox = gr.Textbox(
interactive=False, show_label=False, max_lines=50
)
ask.submit(
fn=gen_conv, inputs=[ask, state], outputs=[chatbot, state, sources_textbox]
)
demo.launch(share=True)
|