adupav commited on
Commit
cd3d014
·
verified ·
1 Parent(s): d8c9651

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -4
app.py CHANGED
@@ -4,6 +4,11 @@ import json
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from sklearn.metrics.pairwise import cosine_similarity
 
 
 
 
 
7
 
8
  """
9
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
@@ -11,27 +16,42 @@ For more information on `huggingface_hub` Inference API support, please check th
11
 
12
  # Load embeddings from a JSON file
13
  def load_embeddings(file_path):
 
14
  with open(file_path, 'r', encoding='utf-8') as file:
15
- return json.load(file)
 
 
16
 
17
  # Function to get relevant articles based on user query
18
  def get_relevant_documents(query, embeddings_data, model, top_k=3):
 
 
 
19
  query_embedding = model.encode(query)
20
  similarities = []
21
 
22
- for entry in embeddings_data:
23
  embedding = np.array(entry['embedding'])
24
  similarity = cosine_similarity([query_embedding], [embedding])[0][0]
25
  similarities.append((entry, similarity))
 
 
26
 
27
- # Sort by similarity and return top_k relevant entries
28
  similarities.sort(key=lambda x: x[1], reverse=True)
29
  top_entries = [entry for entry, _ in similarities[:top_k]]
30
 
 
 
 
 
 
 
31
  return top_entries
32
 
33
  # Function to format relevant documents into a string
34
  def format_documents(documents):
 
35
  formatted = ""
36
  for doc in documents:
37
  formatted += f"Relevant article: {doc['name']}\n{doc['content']}\n\n"
@@ -48,12 +68,21 @@ def respond(
48
  embeddings_data,
49
  model
50
  ):
 
 
 
 
51
  # Search for relevant documents based on user input
52
  relevant_docs = get_relevant_documents(message, embeddings_data, model)
53
  retrieved_context = format_documents(relevant_docs)
54
 
 
 
 
 
55
  # Add the retrieved context as part of the system message
56
  system_message_with_context = system_message + "\n\n" + "Relevant documents:\n" + retrieved_context
 
57
 
58
  messages = [{"role": "system", "content": system_message_with_context}]
59
 
@@ -64,10 +93,12 @@ def respond(
64
  messages.append({"role": "assistant", "content": val[1]})
65
 
66
  messages.append({"role": "user", "content": message})
 
67
 
68
  response = ""
69
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
70
 
 
71
  for message in client.chat_completion(
72
  messages,
73
  max_tokens=max_tokens,
@@ -78,11 +109,17 @@ def respond(
78
  token = message.choices[0].delta.content
79
  response += token
80
  yield response
 
 
 
 
81
 
82
  # Load embeddings and model once at startup
83
  embeddings_file = 'Code Civil vectorised.json'
 
84
  embeddings_data = load_embeddings(embeddings_file)
85
- embedding_model = SentenceTransformer('Lajavaness/bilingual-embedding-small', trust_remote_code=True)
 
86
 
87
  # Gradio interface
88
  demo = gr.ChatInterface(
@@ -104,4 +141,5 @@ demo = gr.ChatInterface(
104
  )
105
 
106
  if __name__ == "__main__":
 
107
  demo.launch()
 
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
+ import logging
8
+ import time
9
+
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
 
13
  """
14
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
16
 
17
  # Load embeddings from a JSON file
18
  def load_embeddings(file_path):
19
+ logging.info(f"Loading embeddings from {file_path}")
20
  with open(file_path, 'r', encoding='utf-8') as file:
21
+ embeddings = json.load(file)
22
+ logging.info(f"Loaded {len(embeddings)} embeddings")
23
+ return embeddings
24
 
25
  # Function to get relevant articles based on user query
26
  def get_relevant_documents(query, embeddings_data, model, top_k=3):
27
+ logging.info(f"Received query: {query}")
28
+ start_time = time.time()
29
+
30
  query_embedding = model.encode(query)
31
  similarities = []
32
 
33
+ for i, entry in enumerate(embeddings_data):
34
  embedding = np.array(entry['embedding'])
35
  similarity = cosine_similarity([query_embedding], [embedding])[0][0]
36
  similarities.append((entry, similarity))
37
+ if i % 100 == 0: # Log every 100 iterations
38
+ logging.debug(f"Processed {i} embeddings")
39
 
40
+ logging.info("Sorting similarities")
41
  similarities.sort(key=lambda x: x[1], reverse=True)
42
  top_entries = [entry for entry, _ in similarities[:top_k]]
43
 
44
+ end_time = time.time()
45
+ duration = end_time - start_time
46
+
47
+ logging.info(f"Query processed in {duration:.2f} seconds")
48
+ logging.info(f"Top {top_k} documents returned with similarities: {[sim[1] for sim in similarities[:top_k]]}")
49
+
50
  return top_entries
51
 
52
  # Function to format relevant documents into a string
53
  def format_documents(documents):
54
+ logging.info(f"Formatting {len(documents)} documents")
55
  formatted = ""
56
  for doc in documents:
57
  formatted += f"Relevant article: {doc['name']}\n{doc['content']}\n\n"
 
68
  embeddings_data,
69
  model
70
  ):
71
+ logging.info(f"New user query: {message}")
72
+
73
+ start_time = time.time()
74
+
75
  # Search for relevant documents based on user input
76
  relevant_docs = get_relevant_documents(message, embeddings_data, model)
77
  retrieved_context = format_documents(relevant_docs)
78
 
79
+ # Log the statistics about the retrieved documents
80
+ logging.info(f"Total documents retrieved: {len(relevant_docs)}")
81
+ logging.info(f"Documents: " + {[doc['name'] for doc in relevant_docs]})
82
+
83
  # Add the retrieved context as part of the system message
84
  system_message_with_context = system_message + "\n\n" + "Relevant documents:\n" + retrieved_context
85
+ logging.info("System message updated with retrieved context")
86
 
87
  messages = [{"role": "system", "content": system_message_with_context}]
88
 
 
93
  messages.append({"role": "assistant", "content": val[1]})
94
 
95
  messages.append({"role": "user", "content": message})
96
+ logging.info("Messages prepared for InferenceClient")
97
 
98
  response = ""
99
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
100
 
101
+ logging.info("Sending request to InferenceClient")
102
  for message in client.chat_completion(
103
  messages,
104
  max_tokens=max_tokens,
 
109
  token = message.choices[0].delta.content
110
  response += token
111
  yield response
112
+
113
+ end_time = time.time()
114
+ total_duration = end_time - start_time
115
+ logging.info(f"Response generated in {total_duration:.2f} seconds")
116
 
117
  # Load embeddings and model once at startup
118
  embeddings_file = 'Code Civil vectorised.json'
119
+ logging.info("Starting application, loading embeddings and model")
120
  embeddings_data = load_embeddings(embeddings_file)
121
+ embedding_model = SentenceTransformer('Lajavaness/bilingual-embedding-small')
122
+ logging.info("Model and embeddings loaded successfully")
123
 
124
  # Gradio interface
125
  demo = gr.ChatInterface(
 
141
  )
142
 
143
  if __name__ == "__main__":
144
+ logging.info("Launching Gradio app")
145
  demo.launch()