adupav commited on
Commit
bb1b69d
1 Parent(s): e315566

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -66,6 +66,7 @@ def respond(
66
  temperature,
67
  top_p,
68
  embeddings_data,
 
69
  model
70
  ):
71
  logging.info(f"New user query: {message}")
@@ -73,12 +74,12 @@ def respond(
73
  start_time = time.time()
74
 
75
  # Search for relevant documents based on user input
76
- relevant_docs = get_relevant_documents(message, embeddings_data, model)
77
  retrieved_context = format_documents(relevant_docs)
78
 
79
  # Log the statistics about the retrieved documents
80
  logging.info(f"Total documents retrieved: {len(relevant_docs)}")
81
- logging.info(f"Documents: " + {[doc['name'] for doc in relevant_docs]})
82
 
83
  # Add the retrieved context as part of the system message
84
  system_message_with_context = system_message + "\n\n" + "Relevant documents:\n" + retrieved_context
@@ -95,10 +96,12 @@ def respond(
95
  messages.append({"role": "user", "content": message})
96
  logging.info("Messages prepared for InferenceClient")
97
 
98
- response = ""
99
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
100
 
101
  logging.info("Sending request to InferenceClient")
 
 
 
102
  for message in client.chat_completion(
103
  messages,
104
  max_tokens=max_tokens,
@@ -108,11 +111,13 @@ def respond(
108
  ):
109
  token = message.choices[0].delta.content
110
  response += token
111
- yield response
112
 
113
  end_time = time.time()
114
  total_duration = end_time - start_time
115
  logging.info(f"Response generated in {total_duration:.2f} seconds")
 
 
 
116
 
117
  # Load embeddings and model once at startup
118
  embeddings_file = 'Code Civil vectorised.json'
 
66
  temperature,
67
  top_p,
68
  embeddings_data,
69
+ tokenizer,
70
  model
71
  ):
72
  logging.info(f"New user query: {message}")
 
74
  start_time = time.time()
75
 
76
  # Search for relevant documents based on user input
77
+ relevant_docs = get_relevant_documents(message, embeddings_data, tokenizer, model)
78
  retrieved_context = format_documents(relevant_docs)
79
 
80
  # Log the statistics about the retrieved documents
81
  logging.info(f"Total documents retrieved: {len(relevant_docs)}")
82
+ logging.info(f"Documents: " + str([doc['name'] for doc in relevant_docs]))
83
 
84
  # Add the retrieved context as part of the system message
85
  system_message_with_context = system_message + "\n\n" + "Relevant documents:\n" + retrieved_context
 
96
  messages.append({"role": "user", "content": message})
97
  logging.info("Messages prepared for InferenceClient")
98
 
 
99
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
100
 
101
  logging.info("Sending request to InferenceClient")
102
+ response = ""
103
+
104
+ # Collect the full response instead of yielding each token
105
  for message in client.chat_completion(
106
  messages,
107
  max_tokens=max_tokens,
 
111
  ):
112
  token = message.choices[0].delta.content
113
  response += token
 
114
 
115
  end_time = time.time()
116
  total_duration = end_time - start_time
117
  logging.info(f"Response generated in {total_duration:.2f} seconds")
118
+
119
+ return response # Return the complete response as a string
120
+
121
 
122
  # Load embeddings and model once at startup
123
  embeddings_file = 'Code Civil vectorised.json'