from elasticsearch import Elasticsearch import os import json import requests ES_URL = os.environ["ES_URL"] ES_USER = os.environ["ES_USER"] ES_PASS = os.environ["ES_PASS"] ES_CA_CERT = os.environ["ES_CA_CERT"] class ESGPT: def __init__(self, index_name): self.es = Elasticsearch(ES_URL, http_auth=(ES_USER, ES_PASS), ca_certs=ES_CA_CERT, verify_certs=True) self.index_name = index_name self.model_engine = os.environ["OPENAI_GPT_ENGINE"] self.api_key = os.environ["OPENAI_API_KEY"] def index(self, doc_id, doc): self.es.index(index=self.index_name, id=doc_id, document=doc) def search(self, query): body = { "query": { "query_string": {"query": query} } } results = self.es.search(index=self.index_name, body=body) return results['hits']['hits'] def _paper_results_to_text(self, results): text_result = "" for paper in results: title = "" if "title" in paper["_source"]: title = paper["_source"]["title"] abstract = "" if "abctract" in paper["_source"]: abstract = paper["_source"]["abstract"] paper_str = f"{title}:\n{abstract[:100]}\n\n" text_result += paper_str return text_result def summarize(self, query, results): # Generate summaries for each search result result_json_str = self._paper_results_to_text(results) if result_json_str == "": result_json_str = "No results found" print(result_json_str[:500]) body = { "model": self.model_engine, "prompt": f"Please summarize the following search results for query: {query}:\n{result_json_str[:1000]}", "max_tokens": 1000, "n": 1, "stop": None, "temperature": 0.5, "stream": True, } headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"} resp = requests.post("https://api.openai.com/v1/completions", headers=headers, data=json.dumps(body), stream=True) return resp