Spaces:
Sleeping
Sleeping
File size: 7,821 Bytes
35ffba0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig, AutoModel
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.memory import ConversationBufferWindowMemory
import torch
import datetime
from typing import Any, Dict, Union
from functions.context_engineering import get_context_data
import os
from safetensors.torch import load_model, save_model
def load_model(model_id: str = "teknium/OpenHermes-2.5-Mistral-7B") -> tuple:
"""
Load the LLM and its corresponding tokenizer.
Args:
model_id (str, optional): Identifier for the pre-trained model. Defaults to "teknium/OpenHermes-2.5-Mistral-7B".
Returns:
tuple: A tuple containing the loaded model and tokenizer.
"""
# Load the tokenizer for Mistral-7B-Instruct model
tokenizer_path = "./mistral/tokenizer"
if os.path.isdir(tokenizer_path) == False:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.save_pretrained(tokenizer_path)
else:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
# Set the pad token to the unknown token to handle padding
tokenizer.pad_token = tokenizer.unk_token
# Set the padding side to "right" to prevent warnings during tokenization
tokenizer.padding_side = "right"
# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model_path = "/tmp/mistral/model"
if os.path.exists(model_path):
print("Loading model from disk")
model_llm = AutoModelForCausalLM.from_pretrained(model_path)
else:
# Load the Mistral-7B-Instruct model with quantization configuration
model_llm = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
quantization_config=bnb_config,
)
model_llm.save_pretrained(model_path)
# Configure the pad token ID in the model to match the tokenizer's pad token ID
model_llm.config.pad_token_id = tokenizer.pad_token_id
return model_llm, tokenizer
def get_prompt_template():
"""
Retrieve a template for generating prompts in a conversational AI system.
Returns:
str: A string representing the template for generating prompts.
This template includes placeholders for system information,
instructions, previous conversation, context, date and user query.
"""
prompt_template = """<|im_start|>system
You are one of the best air quality experts in the world.
###INSTRUCTIONS:
- If you don't know the answer, you will respond politely that you cannot help.
- Use the context table with air quality indicators for city provided by user to generate your answer.
- You answer should be at least one sentence.
- Do not show any calculations to the user.
- Make sure that you use correct air quality indicators for the corresponding date.
- Add a rich analysis of the air quality level, such as whether it is safe, whether to go for a walk, etc.
- Do not mention in your answer that you are using context table.
<|im_end|>
### CONTEXT:
{context}
IMPORTANT: Today is {date_today}.
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant"""
return prompt_template
def get_llm_chain(model_llm, tokenizer):
"""
Create and configure a language model chain.
Args:
model_llm: The pre-trained language model for text generation.
tokenizer: The tokenizer corresponding to the language model.
Returns:
LLMChain: The configured language model chain.
"""
# Create a text generation pipeline using the loaded model and tokenizer
text_generation_pipeline = transformers.pipeline(
model=model_llm, # The pre-trained language model for text generation
tokenizer=tokenizer, # The tokenizer corresponding to the language model
task="text-generation", # Specify the task as text generation
use_cache=True,
do_sample=True,
temperature=0.4,
top_p=1.0,
top_k=0,
max_new_tokens=512,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
# Create a Hugging Face pipeline for Mistral LLM using the text generation pipeline
mistral_llm = HuggingFacePipeline(
pipeline=text_generation_pipeline,
)
# Create prompt from prompt template
prompt = PromptTemplate(
input_variables=["context", "question", "date_today"],
template=get_prompt_template(),
)
# Create LLM chain
llm_chain = LLMChain(
llm=mistral_llm,
prompt=prompt,
verbose=False,
)
return llm_chain
def generate_response(
user_query: str,
feature_view,
weather_fg,
model_air_quality,
model_llm,
tokenizer,
llm_chain=None,
verbose: bool = False,
) -> str:
"""
Generate response to user query using LLM chain and context data.
Args:
user_query (str): The user's query.
feature_view: Feature view for data retrieval.
model_llm: Language model for text generation.
tokenizer: Tokenizer for processing text.
model_air_quality: Model for predicting air quality.
llm_chain: LLM Chain.
verbose (bool): Whether to print verbose information. Defaults to False.
Returns:
str: Generated response to the user query.
"""
# Get context data based on user query
context = get_context_data(
user_query,
feature_view,
weather_fg,
model_air_quality,
model_llm=model_llm,
tokenizer=tokenizer,
)
# Get today's date in a readable format
date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}'
# Print today's date and context information if verbose mode is enabled
if verbose:
print(f"ποΈ Today's date: {date_today}")
print(f'π {context}')
# Invoke the language model chain with relevant context
model_output = llm_chain.invoke({
"context": context,
"date_today": date_today,
"question": user_query,
})
# Return the generated text from the model output
return model_output['text'].split('<|im_start|>assistant')[-1]
def generate_response_openai(
user_query: str,
feature_view,
weather_fg,
model_air_quality,
client,
verbose=True,
):
context = get_context_data(
user_query,
feature_view,
weather_fg,
model_air_quality,
client=client,
)
# Get today's date in a readable format
date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}'
# Print today's date and context information if verbose mode is enabled
if verbose:
print(f"ποΈ Today's date: {date_today}")
print(f'π {context}')
instructions = get_prompt_template().split('<|im_start|>user')[0]
instructions_filled = instructions.format(
context=context,
date_today=date_today
)
completion = client.chat.completions.create(
model="gpt-4-0125-preview",
messages=[
{"role": "system", "content": instructions_filled},
{"role": "user", "content": user_query},
]
)
# Extract and return the assistant's reply from the response
if completion and completion.choices:
last_choice = completion.choices[0]
if last_choice.message:
return last_choice.message.content.strip()
return ""
|