File size: 7,821 Bytes
35ffba0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig, AutoModel
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.memory import ConversationBufferWindowMemory
import torch
import datetime
from typing import Any, Dict, Union
from functions.context_engineering import get_context_data
import os
from safetensors.torch import load_model, save_model

def load_model(model_id: str = "teknium/OpenHermes-2.5-Mistral-7B") -> tuple:
    """
    Load the LLM and its corresponding tokenizer.

    Args:
        model_id (str, optional): Identifier for the pre-trained model. Defaults to "teknium/OpenHermes-2.5-Mistral-7B".

    Returns:
        tuple: A tuple containing the loaded model and tokenizer.
    """

    # Load the tokenizer for Mistral-7B-Instruct model
    tokenizer_path = "./mistral/tokenizer"
    if os.path.isdir(tokenizer_path) == False:
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        tokenizer.save_pretrained(tokenizer_path)
    else:
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

    # Set the pad token to the unknown token to handle padding
    tokenizer.pad_token = tokenizer.unk_token

    # Set the padding side to "right" to prevent warnings during tokenization
    tokenizer.padding_side = "right"

    # BitsAndBytesConfig int-4 config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    model_path = "/tmp/mistral/model"
    if os.path.exists(model_path):
        print("Loading model from disk")
        model_llm = AutoModelForCausalLM.from_pretrained(model_path)
    else:
        # Load the Mistral-7B-Instruct model with quantization configuration
        model_llm = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map="auto",
            quantization_config=bnb_config,
        )
        model_llm.save_pretrained(model_path)


    # Configure the pad token ID in the model to match the tokenizer's pad token ID
    model_llm.config.pad_token_id = tokenizer.pad_token_id

    return model_llm, tokenizer


def get_prompt_template():
    """
    Retrieve a template for generating prompts in a conversational AI system.

    Returns:
        str: A string representing the template for generating prompts.
            This template includes placeholders for system information,
            instructions, previous conversation, context, date and user query.
    """
    prompt_template = """<|im_start|>system
You are one of the best air quality experts in the world.

###INSTRUCTIONS:
- If you don't know the answer, you will respond politely that you cannot help.
- Use the context table with air quality indicators for city provided by user to generate your answer.
- You answer should be at least one sentence.
- Do not show any calculations to the user.
- Make sure that you use correct air quality indicators for the corresponding date.
- Add a rich analysis of the air quality level, such as whether it is safe, whether to go for a walk, etc.
- Do not mention in your answer that you are using context table.
<|im_end|>

### CONTEXT:
{context}

IMPORTANT: Today is {date_today}.

<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant"""
    return prompt_template


def get_llm_chain(model_llm, tokenizer):
    """
    Create and configure a language model chain.

    Args:
        model_llm: The pre-trained language model for text generation.
        tokenizer: The tokenizer corresponding to the language model.

    Returns:
        LLMChain: The configured language model chain.
    """
    # Create a text generation pipeline using the loaded model and tokenizer
    text_generation_pipeline = transformers.pipeline(
        model=model_llm,                      # The pre-trained language model for text generation
        tokenizer=tokenizer,                  # The tokenizer corresponding to the language model
        task="text-generation",               # Specify the task as text generation
        use_cache=True,
        do_sample=True,
        temperature=0.4,
        top_p=1.0,
        top_k=0,
        max_new_tokens=512,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
    )

    # Create a Hugging Face pipeline for Mistral LLM using the text generation pipeline
    mistral_llm = HuggingFacePipeline(
        pipeline=text_generation_pipeline,
    )

    # Create prompt from prompt template
    prompt = PromptTemplate(
        input_variables=["context", "question", "date_today"],
        template=get_prompt_template(),
    )

    # Create LLM chain
    llm_chain = LLMChain(
        llm=mistral_llm,
        prompt=prompt,
        verbose=False,
    )

    return llm_chain


def generate_response(
    user_query: str,
    feature_view,
    weather_fg,
    model_air_quality,
    model_llm,
    tokenizer,
    llm_chain=None,
    verbose: bool = False,
) -> str:
    """
    Generate response to user query using LLM chain and context data.

    Args:
        user_query (str): The user's query.
        feature_view: Feature view for data retrieval.
        model_llm: Language model for text generation.
        tokenizer: Tokenizer for processing text.
        model_air_quality: Model for predicting air quality.
        llm_chain: LLM Chain.
        verbose (bool): Whether to print verbose information. Defaults to False.

    Returns:
        str: Generated response to the user query.
    """
    # Get context data based on user query
    context = get_context_data(
        user_query,
        feature_view,
        weather_fg,
        model_air_quality,
        model_llm=model_llm,
        tokenizer=tokenizer,
    )

    # Get today's date in a readable format
    date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}'

    # Print today's date and context information if verbose mode is enabled
    if verbose:
        print(f"πŸ—“οΈ Today's date: {date_today}")
        print(f'πŸ“– {context}')

    # Invoke the language model chain with relevant context
    model_output = llm_chain.invoke({
        "context": context,
        "date_today": date_today,
        "question": user_query,
    })

    # Return the generated text from the model output
    return model_output['text'].split('<|im_start|>assistant')[-1]


def generate_response_openai(
    user_query: str,
    feature_view,
    weather_fg,
    model_air_quality,
    client,
    verbose=True,
):

    context = get_context_data(
        user_query,
        feature_view,
        weather_fg,
        model_air_quality,
        client=client,
    )

    # Get today's date in a readable format
    date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}'

    # Print today's date and context information if verbose mode is enabled
    if verbose:
        print(f"πŸ—“οΈ Today's date: {date_today}")
        print(f'πŸ“– {context}')

    instructions = get_prompt_template().split('<|im_start|>user')[0]

    instructions_filled = instructions.format(
        context=context,
        date_today=date_today
    )

    completion = client.chat.completions.create(
        model="gpt-4-0125-preview",
        messages=[
            {"role": "system", "content": instructions_filled},
            {"role": "user", "content": user_query},
        ]
    )

    # Extract and return the assistant's reply from the response
    if completion and completion.choices:
        last_choice = completion.choices[0]
        if last_choice.message:
            return last_choice.message.content.strip()
    return ""