not-lain commited on
Commit
95140c0
Β·
1 Parent(s): 07ffad3

🌘wπŸŒ–

Browse files
Files changed (1) hide show
  1. app.py +5 -8
app.py CHANGED
@@ -11,7 +11,7 @@ import torch
11
  from threading import Thread
12
 
13
  token = os.environ["HF_TOKEN"]
14
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
15
  # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
  torch_dtype=torch.float16,
17
  token=token)
@@ -25,12 +25,9 @@ title_text_dataset = load_dataset(
25
  ).select_columns(["title", "text"])
26
 
27
  # Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
28
- int8_view = Index.restore("wikipedia_int8_usearch_50m.index", view=True)
29
  binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
30
- "wikipedia_ubinary_faiss_50m.index"
31
- )
32
- binary_ivf: faiss.IndexBinaryIVF = faiss.read_index_binary(
33
- "wikipedia_ubinary_ivf_faiss_50m.index"
34
  )
35
 
36
  # Load the SentenceTransformer model for embedding the queries
@@ -55,7 +52,7 @@ def search(
55
  )
56
 
57
  # 3. Search the binary index (either exact or approximate)
58
- index = binary_ivf if use_approx else binary_index
59
  _scores, binary_ids = index.search(
60
  query_embedding_ubinary, top_k * rescore_multiplier
61
  )
@@ -156,6 +153,6 @@ the models used in this space are :
156
  demo = gr.ChatInterface(fn=talk,
157
  chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
158
  theme="Soft",
159
- examples=[["Write me a poem about Machine Learning."]],
160
  title="Text Streaming")
161
  demo.launch()
 
11
  from threading import Thread
12
 
13
  token = os.environ["HF_TOKEN"]
14
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-it",
15
  # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
  torch_dtype=torch.float16,
17
  token=token)
 
25
  ).select_columns(["title", "text"])
26
 
27
  # Load the int8 and binary indices. Int8 is loaded as a view to save memory, as we never actually perform search with it.
28
+ int8_view = Index.restore("https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_int8_usearch_1m.index", view=True)
29
  binary_index: faiss.IndexBinaryFlat = faiss.read_index_binary(
30
+ "https://huggingface.co/spaces/sentence-transformers/quantized-retrieval/resolve/main/wikipedia_ubinary_faiss_1m.index"
 
 
 
31
  )
32
 
33
  # Load the SentenceTransformer model for embedding the queries
 
52
  )
53
 
54
  # 3. Search the binary index (either exact or approximate)
55
+ index = binary_index
56
  _scores, binary_ids = index.search(
57
  query_embedding_ubinary, top_k * rescore_multiplier
58
  )
 
153
  demo = gr.ChatInterface(fn=talk,
154
  chatbot=gr.Chatbot(show_label=True, show_share_button=True, show_copy_button=True, likeable=True, layout="bubble", bubble_full_width=False),
155
  theme="Soft",
156
+ examples=[["what is machine learning"]],
157
  title="Text Streaming")
158
  demo.launch()