Cyrile commited on
Commit
d42850a
1 Parent(s): 70b59ee

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -10
README.md CHANGED
@@ -69,24 +69,21 @@ from scipy.spatial.distance import cdist
69
  tokenizer = AutoTokenizer.from_pretrained('cmarkea/bloomz-3b-retriever-v2')
70
  model = AutoModel.from_pretrained('cmarkea/bloomz-3b-retriever-v2')
71
 
72
- def inference(txt: Union[str, List[str]]):
73
  if isinstance(txt, str):
74
  length = 0
75
  else:
76
  length = len(txt)
77
- tok = tokenizer(txt, return_tensors='pt')
78
- with torch.zero_grad():
79
  embedding = model(**tok)
80
- through = torch.arange(length)
81
- tok.get('')
82
-
83
- # Inportant: take only last token!
84
- infer = lambda x: [ii[0][-1] for ii in retriever(x)]
85
 
86
  list_of_contexts = [...]
87
- emb_contexts = np.concatenate(infer(list_of_contexts), axis=0)
88
  list_of_queries = [...]
89
- emb_queries = np.concatenate(infer(list_of_queries), axis=0)
90
 
91
  # Important: take cosine distance!
92
  dist = cdist(emb_queries, emb_contexts, 'cosine')
@@ -125,4 +122,17 @@ top_k = lambda x: [
125
 
126
  # top 5 nearest contexts for each queries
127
  top_contexts = top_k(5)
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  ```
 
69
  tokenizer = AutoTokenizer.from_pretrained('cmarkea/bloomz-3b-retriever-v2')
70
  model = AutoModel.from_pretrained('cmarkea/bloomz-3b-retriever-v2')
71
 
72
+ def infer(txt: Union[str, List[str]]):
73
  if isinstance(txt, str):
74
  length = 0
75
  else:
76
  length = len(txt)
77
+ tok = tokenizer(txt, padding=True, return_tensors='pt')
78
+ with torch.no_grad():
79
  embedding = model(**tok)
80
+ # Inportant: take only last token!
81
+ return embedding.get('last_hidden_state')[:,-1,:].numpy()
 
 
 
82
 
83
  list_of_contexts = [...]
84
+ emb_contexts = infer(list_of_contexts)
85
  list_of_queries = [...]
86
+ emb_queries = infer(list_of_queries)
87
 
88
  # Important: take cosine distance!
89
  dist = cdist(emb_queries, emb_contexts, 'cosine')
 
122
 
123
  # top 5 nearest contexts for each queries
124
  top_contexts = top_k(5)
125
+ ```
126
+
127
+ Citation
128
+ --------
129
+
130
+ ```bibtex
131
+ @online{DeBloomzRetv2,
132
+ AUTHOR = {Cyrile Delestre},
133
+ ORGANIZATION = {Cr{\'e}dit Mutuel Ark{\'e}a},
134
+ URL = {https://huggingface.co/cmarkea/bloomz-3b-retriever-v2},
135
+ YEAR = {2024},
136
+ KEYWORDS = {NLP ; Transformers ; LLM ; Bloomz},
137
+ }
138
  ```