Spaces:
Runtime error
Runtime error
πwπ
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import torch
|
|
11 |
from threading import Thread
|
12 |
|
13 |
token = os.environ["HF_TOKEN"]
|
14 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
|
15 |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
16 |
torch_dtype=torch.float16,
|
17 |
token=token)
|
@@ -25,12 +25,9 @@ title_text_dataset = load_dataset(
|
|
25 |
).select_columns(["title", "text"])
|
26 |
|
27 |
# Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
|
28 |
-
int8_view = Index.restore("
|
29 |
binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
|
30 |
-
"
|
31 |
-
)
|
32 |
-
binary_ivf: faiss.IndexBinaryIVF = faiss.read_index_binary(
|
33 |
-
"wikipedia_ubinary_ivf_faiss_50m.index"
|
34 |
)
|
35 |
|
36 |
# Load the SentenceTransformer model for embedding the queries
|
@@ -55,7 +52,7 @@ def search(
|
|
55 |
)
|
56 |
|
57 |
# 3. Search the binary index (either exact or approximate)
|
58 |
-
index =
|
59 |
_scores, binary_ids = index.search(
|
60 |
query_embedding_ubinary, top_k * rescore_multiplier
|
61 |
)
|
@@ -156,6 +153,6 @@ the models used in this space are :
|
|
156 |
demo = gr.ChatInterface(fn=talk,
|
157 |
chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
|
158 |
theme="Soft",
|
159 |
-
examples=[["
|
160 |
title="Text Streaming")
|
161 |
demo.launch()
|
|
|
11 |
from threading import Thread
|
12 |
|
13 |
token = os.environ["HF_TOKEN"]
|
14 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
|
15 |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
16 |
torch_dtype=torch.float16,
|
17 |
token=token)
|
|
|
25 |
).select_columns(["title", "text"])
|
26 |
|
27 |
# Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
|
28 |
+
int8_view = Index.restore("https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_int8_usearch_1m.index", view=True)
|
29 |
binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
|
30 |
+
"https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_ubinary_faiss_1m.index"
|
|
|
|
|
|
|
31 |
)
|
32 |
|
33 |
# Load the SentenceTransformer model for embedding the queries
|
|
|
52 |
)
|
53 |
|
54 |
# 3. Search the binary index (either exact or approximate)
|
55 |
+
index = binary_index
|
56 |
_scores, binary_ids = index.search(
|
57 |
query_embedding_ubinary, top_k * rescore_multiplier
|
58 |
)
|
|
|
153 |
demo = gr.ChatInterface(fn=talk,
|
154 |
chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
|
155 |
theme="Soft",
|
156 |
+
examples=[["what is machine learning"]],
|
157 |
title="Text Streaming")
|
158 |
demo.launch()
|