aidevhund commited on
Commit
6804742
Β·
1 Parent(s): 6850f57

Copied files from source repository

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. app.py +97 -40
  3. markdowm.py +53 -28
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: QA Bot
3
  emoji: πŸ“š
4
  colorFrom: pink
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Document QA Bot
3
  emoji: πŸ“š
4
  colorFrom: pink
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -12,12 +12,30 @@ import base64
12
  # Load environment variables
13
  load_dotenv()
14
 
15
- llm_models = {
16
- "tiiuae/falcon-7b-instruct": "HundAI-7B-S",
17
- "mistralai/Mixtral-8x7B-Instruct-v0.1": "Mixtral-8x7B",
18
- "meta-llama/Meta-Llama-3-8B-Instruct": "Meta-Llama-8B",
19
- "mistralai/Mistral-7B-Instruct-v0.2": "Mistral-7B",
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  embed_models = [
23
  "BAAI/bge-small-en-v1.5", # 33.4M
@@ -27,26 +45,41 @@ embed_models = [
27
  ]
28
 
29
  # Global variable for selected model
30
- selected_llm_model_name = list(llm_models.keys())[0] # Default to the first model in the dictionary
 
31
  vector_index = None
32
 
33
  # Initialize the parser
34
  parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
 
35
  file_extractor = {
36
- '.pdf': parser,
37
- '.docx': parser,
38
- '.txt': parser,
39
- '.csv': parser,
40
- '.xlsx': parser,
41
- '.pptx': parser,
42
- '.html': parser,
43
- '.jpg': parser,
44
- '.jpeg': parser,
45
- '.png': parser,
46
- '.webp': parser,
47
- '.svg': parser,
 
 
 
 
 
 
 
 
 
 
 
 
48
  }
49
 
 
50
  # File processing function
51
  def load_files(file_path: str, embed_model_name: str):
52
  try:
@@ -54,67 +87,91 @@ def load_files(file_path: str, embed_model_name: str):
54
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
55
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
56
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
 
57
  filename = os.path.basename(file_path)
58
  return f"Ready to give response on {filename}"
59
  except Exception as e:
60
  return f"An error occurred: {e}"
61
 
 
62
  # Function to handle the selected model from dropdown
63
  def set_llm_model(selected_model):
64
  global selected_llm_model_name
65
- selected_llm_model_name = next(key for key, value in llm_models.items() if value == selected_model)
 
 
66
 
67
- # Respond function
 
68
  def respond(message, history):
69
  try:
 
70
  llm = HuggingFaceInferenceAPI(
71
  model_name=selected_llm_model_name,
72
- contextWindow=8192,
73
- maxTokens=1024,
74
- temperature=0.3,
75
- topP=0.9,
76
- frequencyPenalty=0.5,
77
- presencePenalty=0.5,
78
  token=os.getenv("TOKEN")
79
  )
 
 
80
  query_engine = vector_index.as_query_engine(llm=llm)
81
  bot_message = query_engine.query(message)
82
- return f"{llm_models[selected_llm_model_name]}:\n{str(bot_message)}"
 
 
83
  except Exception as e:
84
  if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
85
  return "Please upload a file."
86
  return f"An error occurred: {e}"
87
 
 
 
 
 
 
 
 
 
 
88
  # UI Setup
89
- with gr.Blocks(theme='Hev832/Applio', css='footer {visibility: hidden}') as demo:
90
- gr.Markdown("")
91
  with gr.Tabs():
92
- with gr.TabItem("Introduction"):
93
  gr.Markdown(md.description)
94
 
95
- with gr.TabItem("Chatbot"):
96
- with gr.Accordion("IMPORTANT: READ ME FIRST", open=False):
97
  guid = gr.Markdown(md.guide)
98
  with gr.Row():
99
  with gr.Column(scale=1):
100
- file_input = gr.File(file_count="single", type='filepath', label="Upload document")
101
- embed_model_dropdown = gr.Dropdown(embed_models, label="Select Embedding", interactive=True)
 
102
  with gr.Row():
103
  btn = gr.Button("Submit", variant='primary')
104
  clear = gr.ClearButton()
105
  output = gr.Text(label='Vector Index')
106
- llm_model_dropdown = gr.Dropdown(list(llm_models.values()), label="Select LLM", interactive=True)
107
  with gr.Column(scale=3):
108
  gr.ChatInterface(
109
  fn=respond,
110
  chatbot=gr.Chatbot(height=500),
111
- theme="soft",
112
- textbox=gr.Textbox(placeholder="Ask me any questions on the uploaded document!", container=False)
 
 
113
  )
114
-
 
115
  llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown)
116
  btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
117
  clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
118
 
 
119
  if __name__ == "__main__":
120
- demo.launch()
 
12
  # Load environment variables
13
  load_dotenv()
14
 
15
+ llm_models = [
16
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
17
+ "meta-llama/Meta-Llama-3-8B-Instruct",
18
+ "mistralai/Mistral-7B-Instruct-v0.2",
19
+ "tiiuae/falcon-7b-instruct",
20
+ # "mistralai/Mixtral-8x22B-Instruct-v0.1", ## 281GB>10GB
21
+ # "NousResearch/Yarn-Mistral-7b-64k", ## 14GB>10GB
22
+ # "impira/layoutlm-document-qa", ## ERR
23
+ # "Qwen/Qwen1.5-7B", ## 15GB
24
+ # "Qwen/Qwen2.5-3B", ## high response time
25
+ # "google/gemma-2-2b-jpn-it", ## high response time
26
+ # "impira/layoutlm-invoices", ## bad req
27
+ # "google/pix2struct-docvqa-large", ## bad req
28
+ # "google/gemma-7b-it", ## 17GB > 10GB
29
+ # "google/gemma-2b-it", ## high response time
30
+ # "HuggingFaceH4/zephyr-7b-beta", ## high response time
31
+ # "HuggingFaceH4/zephyr-7b-gemma-v0.1", ## bad req
32
+ # "microsoft/phi-2", ## high response time
33
+ # "TinyLlama/TinyLlama-1.1B-Chat-v1.0", ## high response time
34
+ # "mosaicml/mpt-7b-instruct", ## 13GB>10GB
35
+ # "google/flan-t5-xxl" ## high respons time
36
+ # "NousResearch/Yarn-Mistral-7b-128k", ## 14GB>10GB
37
+ # "Qwen/Qwen2.5-7B-Instruct", ## 15GB>10GB
38
+ ]
39
 
40
  embed_models = [
41
  "BAAI/bge-small-en-v1.5", # 33.4M
 
45
  ]
46
 
47
  # Global variable for selected model
48
+ selected_llm_model_name = llm_models[0] # Default to the first model in the list
49
+ selected_embed_model_name = embed_models[0] # Default to the first model in the list
50
  vector_index = None
51
 
52
  # Initialize the parser
53
  parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
54
+ # Define file extractor with various common extensions
55
  file_extractor = {
56
+ '.pdf': parser, # PDF documents
57
+ '.docx': parser, # Microsoft Word documents
58
+ '.doc': parser, # Older Microsoft Word documents
59
+ '.txt': parser, # Plain text files
60
+ '.csv': parser, # Comma-separated values files
61
+ '.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
62
+ '.pptx': parser, # Microsoft PowerPoint files (for slides)
63
+ '.html': parser, # HTML files (web pages)
64
+ # '.rtf': parser, # Rich Text Format files
65
+ # '.odt': parser, # OpenDocument Text files
66
+ # '.epub': parser, # ePub files (e-books)
67
+
68
+ # Image files for OCR processing
69
+ '.jpg': parser, # JPEG images
70
+ '.jpeg': parser, # JPEG images
71
+ '.png': parser, # PNG images
72
+ # '.bmp': parser, # Bitmap images
73
+ # '.tiff': parser, # TIFF images
74
+ # '.tif': parser, # TIFF images (alternative extension)
75
+ # '.gif': parser, # GIF images (can contain text)
76
+
77
+ # Scanned documents in image formats
78
+ '.webp': parser, # WebP images
79
+ '.svg': parser, # SVG files (vector format, may contain embedded text)
80
  }
81
 
82
+
83
  # File processing function
84
  def load_files(file_path: str, embed_model_name: str):
85
  try:
 
87
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
88
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
89
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
90
+ print(f"Parsing done for {file_path}")
91
  filename = os.path.basename(file_path)
92
  return f"Ready to give response on {filename}"
93
  except Exception as e:
94
  return f"An error occurred: {e}"
95
 
96
+
97
  # Function to handle the selected model from dropdown
98
  def set_llm_model(selected_model):
99
  global selected_llm_model_name
100
+ selected_llm_model_name = selected_model # Update the global variable
101
+ # print(f"Model selected: {selected_model_name}")
102
+ # return f"Model set to: {selected_model_name}"
103
 
104
+
105
+ # Respond function that uses the globally set selected model
106
  def respond(message, history):
107
  try:
108
+ # Initialize the LLM with the selected model
109
  llm = HuggingFaceInferenceAPI(
110
  model_name=selected_llm_model_name,
111
+ contextWindow=8192, # Context window size (typically max length of the model)
112
+ maxTokens=1024, # Tokens per response generation (512-1024 works well for detailed answers)
113
+ temperature=0.3, # Lower temperature for more focused answers (0.2-0.4 for factual info)
114
+ topP=0.9, # Top-p sampling to control diversity while retaining quality
115
+ frequencyPenalty=0.5, # Slight penalty to avoid repetition
116
+ presencePenalty=0.5, # Encourages exploration without digressing too much
117
  token=os.getenv("TOKEN")
118
  )
119
+
120
+ # Set up the query engine with the selected LLM
121
  query_engine = vector_index.as_query_engine(llm=llm)
122
  bot_message = query_engine.query(message)
123
+
124
+ print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
125
+ return f"{selected_llm_model_name}:\n{str(bot_message)}"
126
  except Exception as e:
127
  if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
128
  return "Please upload a file."
129
  return f"An error occurred: {e}"
130
 
131
+ def encode_image(image_path):
132
+ with open(image_path, "rb") as image_file:
133
+ return base64.b64encode(image_file.read()).decode('utf-8')
134
+
135
+ # Encode the images
136
+ github_logo_encoded = encode_image("Images/github-logo.png")
137
+ linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
138
+ website_logo_encoded = encode_image("Images/ai-logo.png")
139
+
140
  # UI Setup
141
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
142
+ gr.Markdown("# DocBotπŸ“„πŸ€–")
143
  with gr.Tabs():
144
+ with gr.TabItem("Intro"):
145
  gr.Markdown(md.description)
146
 
147
+ with gr.TabItem("DocBot"):
148
+ with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
149
  guid = gr.Markdown(md.guide)
150
  with gr.Row():
151
  with gr.Column(scale=1):
152
+ file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
153
+ # gr.Markdown("Dont know what to select check out in Intro tab")
154
+ embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
155
  with gr.Row():
156
  btn = gr.Button("Submit", variant='primary')
157
  clear = gr.ClearButton()
158
  output = gr.Text(label='Vector Index')
159
+ llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
160
  with gr.Column(scale=3):
161
  gr.ChatInterface(
162
  fn=respond,
163
  chatbot=gr.Chatbot(height=500),
164
+ theme = "soft",
165
+ show_progress='full',
166
+ # cache_mode='lazy',
167
+ textbox=gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
168
  )
169
+ gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
170
+ # Set up Gradio interactions
171
  llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown)
172
  btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
173
  clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
174
 
175
+ # Launch the demo with a public link option
176
  if __name__ == "__main__":
177
+ demo.launch()
markdowm.py CHANGED
@@ -1,10 +1,10 @@
1
  description = '''
2
- # πŸ“„ **QueryVault Chatbots: A RAG-Based chatbots for Interactive Document Querying**
3
 
4
- Welcome to the HundAI QueryVault Chatbot, a sophisticated Retrieval-Augmented Generation (RAG) application that utilizes Large Language Models to answer questions based on documents you upload. This bot is designed to empower you with rapid, insightful responses, providing a choice of language models (LLMs) and embedding models that cater to various requirements, including performance, accuracy, and response time.
5
 
6
  ## ✨ **Application Overview**
7
- With QueryVault Chatbot, you can interactively query your document, receive contextual answers, and dynamically switch between LLMs as needed for optimal results. The bot supports various file formats, allowing you to upload and analyze different types of documents and even some image formats.
8
 
9
  ### **Key Features**
10
  - **Choice of Models:** Access a list of powerful LLMs and embedding models for optimal results.
@@ -14,10 +14,10 @@ With QueryVault Chatbot, you can interactively query your document, receive cont
14
 
15
  ---
16
 
17
- ## πŸš€ **Steps to Use the HundAI QueryVault Chatbot**
18
 
19
  1. **Upload Your File**
20
- Begin by uploading a document. Supported formats include .pdf, .docx, .txt, .csv, .xlsx, .pptx, .html, .jpg, .png, and more.
21
 
22
  2. **Select Embedding Model**
23
  Choose an embedding model to parse and index the document’s contents, then submit. Wait for the confirmation message that the document has been successfully indexed.
@@ -39,32 +39,32 @@ Upon uploading a document, the bot utilizes **LlamaParse** to parse its content.
39
  ## πŸ” **Available LLMs and Embedding Models**
40
 
41
  ### **Embedding Models** (For indexing document content)
42
- 1. **BAAI/bge-large-en**
43
  - **Size**: 335M parameters
44
  - **Best For**: Complex, detailed embeddings; slower but yields high accuracy.
45
- 2. **BAAI/bge-small-en-v1.5**
46
  - **Size**: 33.4M parameters
47
  - **Best For**: Faster embeddings, ideal for lighter workloads and quick responses.
48
- 3. **NeuML/pubmedbert-base-embeddings**
49
  - **Size**: 768-dimensional dense vector space
50
  - **Best For**: Biomedical or medical-related text; highly specialized.
51
- 4. **BAAI/llm-embedder**
52
  - **Size**: 109M parameters
53
  - **Best For**: Basic embeddings for straightforward use cases.
54
 
55
  ### **LLMs** (For generating answers)
56
- 1. **Mixtral-8x7B-Instruct**
57
  - **Size**: 46.7B parameters
58
  - **Purpose**: Demonstrates compelling performance with minimal fine-tuning. Suited for unmoderated or exploratory use.
59
- 2. **Meta-Llama-3-8B-Instruct**
60
  - **Size**: 8.03B parameters
61
  - **Purpose**: Optimized for dialogue, emphasizing safety and helpfulness. Excellent for structured, instructive responses.
62
- 3. **Mistral-7B**
63
  - **Size**: 7.24B parameters
64
  - **Purpose**: Fine-tuned for effectiveness; lacks moderation, useful for quick demonstration purposes.
65
- 4. **HundAI-7B-S**
66
  - **Size**: 7.22B parameters
67
- - **Purpose**: Robust fine-tuned model for inference, leveraging large-scale data for highly contextual responses.
68
 
69
  ---
70
 
@@ -74,18 +74,18 @@ The choice of embedding models plays a crucial role in determining the speed and
74
 
75
  | **Scenario** | **Embedding Model** | **Strengths** | **Trade-Offs** |
76
  |:-----------------------------:|:------------------------------------:|:--------------------------------------------------:|:------------------------------------:|
77
- | **Fastest Response** | BAAI/bge-small-en-v1.5 | Speed-oriented, ideal for high-frequency querying | May miss nuanced details |
78
- | **High Accuracy for Large Texts** | BAAI/bge-large-en | High accuracy, captures complex document structure | Slower response time |
79
- | **Balanced General Purpose** | BAAI/llm-embedder | Reliable, quick response, adaptable across topics | Moderate accuracy, general use case |
80
- | **Biomedical & Specialized Text** | NeuML/pubmedbert-base-embeddings | Optimized for medical and scientific text | Specialized, slightly slower |
81
 
82
  ---
83
 
84
  ## πŸ“‚ **Supported File Formats**
85
 
86
  The bot supports a range of document formats, making it versatile for various data sources. Below are the currently supported formats:
87
- - **Documents**: .pdf, .docx, .doc, .txt, .csv, .xlsx, .pptx, .html
88
- - **Images**: .jpg, .jpeg, .png, .webp, .svg
89
 
90
  ---
91
 
@@ -106,7 +106,7 @@ The bot supports a range of document formats, making it versatile for various da
106
  ---
107
 
108
  ### 🌟 **Get Started Today and Experience Document-Centric Question Answering**
109
- Whether you're a student, researcher, or professional, HundAI QueryVault Chatbot is your go-to tool for interactive, accurate document analysis. Upload your file, select your model, and dive into a seamless question-answering experience tailored to your document's unique content.
110
  '''
111
 
112
  guide = '''
@@ -114,17 +114,42 @@ guide = '''
114
 
115
  | **Embedding Model** | **Speed (Vector Index)** | **Advantages** | **Trade-Offs** |
116
  |-----------------------------|-------------------|-------------------------------------|---------------------------------|
117
- | BAAI/bge-small-en-v1.5 | **Fastest** | Ideal for quick indexing | May miss nuanced details |
118
- | BAAI/llm-embedder | **Fast** | Balanced performance and detail | Slightly less precise than large models |
119
- | BAAI/bge-large-en | **Slow** | Best overall precision and detail | Slower due to complexity |
120
 
121
 
122
  ### Language Models (LLMs) and Use Cases
123
 
124
  | **LLM** | **Best Use Case** |
125
  |------------------------------------|-----------------------------------------|
126
- | Mixtral-8x7B-Instruct-v0.1 | Works well for **both short and long answers** |
127
- | Meta-Llama-3-8B-Instruct | Ideal for **long-length answers** |
128
- | HundAI-7B-S | Best suited for **short-length answers** |
129
 
130
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  description = '''
2
+ # πŸ“„ **Document QA Bot: A RAG-Based Application for Interactive Document Querying**
3
 
4
+ Welcome to the Document QA Bot, a sophisticated Retrieval-Augmented Generation (RAG) application that utilizes **LlamaIndex** and **Hugging Face** models to answer questions based on documents you upload. This bot is designed to empower you with rapid, insightful responses, providing a choice of language models (LLMs) and embedding models that cater to various requirements, including performance, accuracy, and response time.
5
 
6
  ## ✨ **Application Overview**
7
+ With Document QA Bot, you can interactively query your document, receive contextual answers, and dynamically switch between LLMs as needed for optimal results. The bot supports various file formats, allowing you to upload and analyze different types of documents and even some image formats.
8
 
9
  ### **Key Features**
10
  - **Choice of Models:** Access a list of powerful LLMs and embedding models for optimal results.
 
14
 
15
  ---
16
 
17
+ ## πŸš€ **Steps to Use the Document QA Bot**
18
 
19
  1. **Upload Your File**
20
+ Begin by uploading a document. Supported formats include `.pdf`, `.docx`, `.txt`, `.csv`, `.xlsx`, `.pptx`, `.html`, `.jpg`, `.png`, and more.
21
 
22
  2. **Select Embedding Model**
23
  Choose an embedding model to parse and index the document’s contents, then submit. Wait for the confirmation message that the document has been successfully indexed.
 
39
  ## πŸ” **Available LLMs and Embedding Models**
40
 
41
  ### **Embedding Models** (For indexing document content)
42
+ 1. **`BAAI/bge-large-en`**
43
  - **Size**: 335M parameters
44
  - **Best For**: Complex, detailed embeddings; slower but yields high accuracy.
45
+ 2. **`BAAI/bge-small-en-v1.5`**
46
  - **Size**: 33.4M parameters
47
  - **Best For**: Faster embeddings, ideal for lighter workloads and quick responses.
48
+ 3. **`NeuML/pubmedbert-base-embeddings`**
49
  - **Size**: 768-dimensional dense vector space
50
  - **Best For**: Biomedical or medical-related text; highly specialized.
51
+ 4. **`BAAI/llm-embedder`**
52
  - **Size**: 109M parameters
53
  - **Best For**: Basic embeddings for straightforward use cases.
54
 
55
  ### **LLMs** (For generating answers)
56
+ 1. **`mistralai/Mixtral-8x7B-Instruct-v0.1`**
57
  - **Size**: 46.7B parameters
58
  - **Purpose**: Demonstrates compelling performance with minimal fine-tuning. Suited for unmoderated or exploratory use.
59
+ 2. **`meta-llama/Meta-Llama-3-8B-Instruct`**
60
  - **Size**: 8.03B parameters
61
  - **Purpose**: Optimized for dialogue, emphasizing safety and helpfulness. Excellent for structured, instructive responses.
62
+ 3. **`mistralai/Mistral-7B-Instruct-v0.2`**
63
  - **Size**: 7.24B parameters
64
  - **Purpose**: Fine-tuned for effectiveness; lacks moderation, useful for quick demonstration purposes.
65
+ 4. **`tiiuae/falcon-7b-instruct`**
66
  - **Size**: 7.22B parameters
67
+ - **Purpose**: Robust open-source model for inference, leveraging large-scale data for highly contextual responses.
68
 
69
  ---
70
 
 
74
 
75
  | **Scenario** | **Embedding Model** | **Strengths** | **Trade-Offs** |
76
  |:-----------------------------:|:------------------------------------:|:--------------------------------------------------:|:------------------------------------:|
77
+ | **Fastest Response** | `BAAI/bge-small-en-v1.5` | Speed-oriented, ideal for high-frequency querying | May miss nuanced details |
78
+ | **High Accuracy for Large Texts** | `BAAI/bge-large-en` | High accuracy, captures complex document structure | Slower response time |
79
+ | **Balanced General Purpose** | `BAAI/llm-embedder` | Reliable, quick response, adaptable across topics | Moderate accuracy, general use case |
80
+ | **Biomedical & Specialized Text** | `NeuML/pubmedbert-base-embeddings` | Optimized for medical and scientific text | Specialized, slightly slower |
81
 
82
  ---
83
 
84
  ## πŸ“‚ **Supported File Formats**
85
 
86
  The bot supports a range of document formats, making it versatile for various data sources. Below are the currently supported formats:
87
+ - **Documents**: `.pdf`, `.docx`, `.doc`, `.txt`, `.csv`, `.xlsx`, `.pptx`, `.html`
88
+ - **Images**: `.jpg`, `.jpeg`, `.png`, `.webp`, `.svg`
89
 
90
  ---
91
 
 
106
  ---
107
 
108
  ### 🌟 **Get Started Today and Experience Document-Centric Question Answering**
109
+ Whether you're a student, researcher, or professional, the Document QA Bot is your go-to tool for interactive, accurate document analysis. Upload your file, select your model, and dive into a seamless question-answering experience tailored to your document's unique content.
110
  '''
111
 
112
  guide = '''
 
114
 
115
  | **Embedding Model** | **Speed (Vector Index)** | **Advantages** | **Trade-Offs** |
116
  |-----------------------------|-------------------|-------------------------------------|---------------------------------|
117
+ | `BAAI/bge-small-en-v1.5` | **Fastest** | Ideal for quick indexing | May miss nuanced details |
118
+ | `BAAI/llm-embedder` | **Fast** | Balanced performance and detail | Slightly less precise than large models |
119
+ | `BAAI/bge-large-en` | **Slow** | Best overall precision and detail | Slower due to complexity |
120
 
121
 
122
  ### Language Models (LLMs) and Use Cases
123
 
124
  | **LLM** | **Best Use Case** |
125
  |------------------------------------|-----------------------------------------|
126
+ | `mistralai/Mixtral-8x7B-Instruct-v0.1` | Works well for **both short and long answers** |
127
+ | `meta-llama/Meta-Llama-3-8B-Instruct` | Ideal for **long-length answers** |
128
+ | `tiiuae/falcon-7b-instruct` | Best suited for **short-length answers** |
129
 
130
+ '''
131
+
132
+ footer = """
133
+ <div style="background-color: #1d2938; color: white; padding: 10px; width: 100%; bottom: 0; left: 0; display: flex; justify-content: space-between; align-items: center; padding: .2rem 35px; box-sizing: border-box; font-size: 16px;">
134
+ <div style="text-align: left;">
135
+ <p style="margin: 0;">&copy; 2024 </p>
136
+ </div>
137
+ <div style="text-align: center; flex-grow: 1;">
138
+ <p style="margin: 0;"> This website is made with ❀ by SARATH CHANDRA</p>
139
+ </div>
140
+ <div class="social-links" style="display: flex; gap: 20px; justify-content: flex-end; align-items: center;">
141
+ <a href="https://github.com/21bq1a4210" target="_blank" style="text-align: center;">
142
+ <img src="data:image/png;base64,{}" alt="GitHub" width="40" height="40" style="display: block; margin: 0 auto;">
143
+ <span style="font-size: 14px;">GitHub</span>
144
+ </a>
145
+ <a href="https://www.linkedin.com/in/sarath-chandra-bandreddi-07393b1aa/" target="_blank" style="text-align: center;">
146
+ <img src="data:image/png;base64,{}" alt="LinkedIn" width="40" height="40" style="display: block; margin: 0 auto;">
147
+ <span style="font-size: 14px;">LinkedIn</span>
148
+ </a>
149
+ <a href="https://21bq1a4210.github.io/MyPortfolio-/" target="_blank" style="text-align: center;">
150
+ <img src="data:image/png;base64,{}" alt="Portfolio" width="40" height="40" style="display: block; margin-right: 40px;">
151
+ <span style="font-size: 14px;">Portfolio</span>
152
+ </a>
153
+ </div>
154
+ </div>
155
+ """