Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import time, aiohttp, asyncio, json, os, multiprocessing
|
2 |
from minivectordb.embedding_model import EmbeddingModel
|
3 |
from minivectordb.vector_database import VectorDatabase
|
4 |
from text_util_en_pt.cleaner import structurize_text, detect_language, Language
|
@@ -6,15 +6,17 @@ from webtextcrawler.webtextcrawler import extract_text_from_url
|
|
6 |
from duckduckgo_search import DDGS
|
7 |
import gradio as gr
|
8 |
|
|
|
|
|
9 |
openrouter_key = os.environ.get("OPENROUTER_KEY")
|
10 |
-
model = EmbeddingModel(use_quantized_onnx_model=
|
11 |
|
12 |
-
def fetch_links(query, max_results=
|
13 |
with DDGS() as ddgs:
|
14 |
return [r['href'] for r in ddgs.text(query, max_results=max_results)]
|
15 |
|
16 |
def fetch_texts(links):
|
17 |
-
with multiprocessing.Pool() as pool:
|
18 |
texts = pool.map(extract_text_from_url, links)
|
19 |
return '\n'.join([t for t in texts if t])
|
20 |
|
@@ -34,7 +36,7 @@ def index_and_search(query, text):
|
|
34 |
|
35 |
# Retrieval
|
36 |
start = time.time()
|
37 |
-
search_results = vector_db.find_most_similar(query_embedding, k =
|
38 |
retrieval_time = time.time() - start
|
39 |
return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
|
40 |
|
@@ -117,13 +119,13 @@ async def predict(message, history):
|
|
117 |
# Setting up the Gradio chat interface.
|
118 |
gr.ChatInterface(
|
119 |
predict,
|
120 |
-
title="
|
121 |
description="Ask any question, and I will try to answer it using web search !",
|
122 |
retry_btn=None,
|
123 |
undo_btn=None,
|
124 |
examples=[
|
125 |
'When did the first human land on the moon?',
|
126 |
-
'Liquid vs solid vs gas
|
127 |
'What is the capital of France?',
|
128 |
'Why does Brazil has a high tax rate?'
|
129 |
]
|
|
|
1 |
+
import time, aiohttp, asyncio, json, os, multiprocessing, torch
|
2 |
from minivectordb.embedding_model import EmbeddingModel
|
3 |
from minivectordb.vector_database import VectorDatabase
|
4 |
from text_util_en_pt.cleaner import structurize_text, detect_language, Language
|
|
|
6 |
from duckduckgo_search import DDGS
|
7 |
import gradio as gr
|
8 |
|
9 |
+
torch.set_num_threads(2)
|
10 |
+
|
11 |
openrouter_key = os.environ.get("OPENROUTER_KEY")
|
12 |
+
model = EmbeddingModel(use_quantized_onnx_model=True)
|
13 |
|
14 |
+
def fetch_links(query, max_results=5):
|
15 |
with DDGS() as ddgs:
|
16 |
return [r['href'] for r in ddgs.text(query, max_results=max_results)]
|
17 |
|
18 |
def fetch_texts(links):
|
19 |
+
with multiprocessing.Pool(5) as pool:
|
20 |
texts = pool.map(extract_text_from_url, links)
|
21 |
return '\n'.join([t for t in texts if t])
|
22 |
|
|
|
36 |
|
37 |
# Retrieval
|
38 |
start = time.time()
|
39 |
+
search_results = vector_db.find_most_similar(query_embedding, k = 12)
|
40 |
retrieval_time = time.time() - start
|
41 |
return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
|
42 |
|
|
|
119 |
# Setting up the Gradio chat interface.
|
120 |
gr.ChatInterface(
|
121 |
predict,
|
122 |
+
title="Web Search with LLM !",
|
123 |
description="Ask any question, and I will try to answer it using web search !",
|
124 |
retry_btn=None,
|
125 |
undo_btn=None,
|
126 |
examples=[
|
127 |
'When did the first human land on the moon?',
|
128 |
+
'Liquid vs solid vs gas?',
|
129 |
'What is the capital of France?',
|
130 |
'Why does Brazil has a high tax rate?'
|
131 |
]
|