Spaces:
Running
Running
Vinh Nguyen
commited on
Disable tokenizer transformer parallelism to avoid deadlocks
Browse files- document_retriever.py +5 -3
document_retriever.py
CHANGED
@@ -2,17 +2,19 @@ import os
|
|
2 |
import tempfile
|
3 |
|
4 |
import streamlit as st
|
5 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
from langchain_community.document_loaders import (
|
7 |
Docx2txtLoader,
|
8 |
PyPDFLoader,
|
9 |
TextLoader,
|
10 |
UnstructuredEPubLoader,
|
11 |
)
|
|
|
12 |
from langchain_community.vectorstores import DocArrayInMemorySearch
|
13 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
14 |
|
15 |
-
|
|
|
|
|
16 |
|
17 |
|
18 |
@st.cache_resource(ttl="1h")
|
@@ -47,7 +49,7 @@ def configure_retriever(files):
|
|
47 |
splits = text_splitter.split_documents(docs)
|
48 |
|
49 |
# Create embeddings and store in vectordb
|
50 |
-
embeddings = HuggingFaceEmbeddings(model_name=
|
51 |
vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings)
|
52 |
|
53 |
# Define retriever
|
|
|
2 |
import tempfile
|
3 |
|
4 |
import streamlit as st
|
|
|
5 |
from langchain_community.document_loaders import (
|
6 |
Docx2txtLoader,
|
7 |
PyPDFLoader,
|
8 |
TextLoader,
|
9 |
UnstructuredEPubLoader,
|
10 |
)
|
11 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
12 |
from langchain_community.vectorstores import DocArrayInMemorySearch
|
13 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
14 |
|
15 |
+
# disable tokenizer transformer parallelism to avoid deadlocks
|
16 |
+
# https://github.com/huggingface/transformers/issues/5486
|
17 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
18 |
|
19 |
|
20 |
@st.cache_resource(ttl="1h")
|
|
|
49 |
splits = text_splitter.split_documents(docs)
|
50 |
|
51 |
# Create embeddings and store in vectordb
|
52 |
+
embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
|
53 |
vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings)
|
54 |
|
55 |
# Define retriever
|