File size: 3,225 Bytes
6855b1e
8922c23
6855b1e
 
 
 
 
 
8922c23
428be9a
6855b1e
 
 
 
 
 
 
 
 
 
 
 
a8c00ab
6855b1e
 
 
 
 
 
 
a8c00ab
6855b1e
428be9a
 
 
 
 
 
6855b1e
428be9a
 
 
 
6855b1e
428be9a
6855b1e
 
 
 
 
3152165
6855b1e
de33d74
6855b1e
 
 
 
 
 
 
 
 
 
8922c23
6855b1e
 
 
 
 
39a05d7
 
f58ccf2
6855b1e
 
 
d29bff5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import logging
import os
from time import asctime

import gradio as gr
from llama_index.core import Document, VectorStoreIndex

from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
    generate_chat_response_with_history_rag_return_response
from read_write_index import read_write_index
from web_search import search

logger = logging.getLogger("agent_logger")
sourced = False
query = False
rag_similarity = False


def google_search_chat(message, history):
    condensed_question = condense_question(message, history)
    if is_search_query(condensed_question):
        search_results = search(message, condensed_question)
        print(f'Search results returned: {len(search_results)}')
        relevant_content = ""
        for index, result in enumerate(search_results):
            relevant_content = relevant_content + "\n" + ''.join(result['text'])

        if relevant_content != "":
            documents = [Document(text=relevant_content)]
            index = VectorStoreIndex.from_documents(documents)
            print('Search results vectorized...')
            response = generate_chat_response_with_history_rag_return_response(index, message, history)
        else:
            print(f'Assistant Response: Sorry, no search results found, trying with offline resources.')
            index = read_write_index(path='storage_search/')
            response = generate_chat_response_with_history_rag_return_response(index, message, history)
        response_text = []
        string_output = ""

        for text in response.response_gen:
            response_text.append(text)
            string_output = ''.join(response_text)
            yield string_output

        print(f'Assistant Response: {string_output}')

    else:
        yield from generate_chat_response_with_history(message, history)


if __name__ == '__main__':
    logging.root.setLevel(logging.INFO)
    filehandler = logging.FileHandler(f'agent_log_{asctime().replace(" ", "").lower().replace(":", "")}.log',
                                      'a')
    formatter = logging.Formatter('%(asctime)-15s::%(levelname)s::%(filename)s::%(funcName)s::%(lineno)d::%(message)s')
    filehandler.setFormatter(formatter)
    logger = logging.getLogger("agent_logger")
    for hdlr in logger.handlers[:]:  # remove the existing file handlers
        if isinstance(hdlr, logging.FileHandler):
            logger.removeHandler(hdlr)
    logger.addHandler(filehandler)  # set the new handler
    logger.setLevel(logging.INFO)

    api_key = os.getenv('gpt_api_key')

    # GPT - 4 Turbo. The latest GPT - 4 model intended to reduce cases of “laziness” where the model doesn’t complete
    # a task. Returns a maximum of 4,096 output tokens. Link:
    # https://openai.com/blog/new-embedding-models-and-api-updates
    set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)

    print("Launching Gradio ChatInterface for searchbot...")

    demo = gr.ChatInterface(fn=google_search_chat,
                            title="Search Assistant", retry_btn=None, undo_btn=None, clear_btn=None,
                            theme="soft")
    demo.launch()
#   auth=('convo', 'session2024')