|
import os |
|
import pinecone |
|
from tqdm import tqdm |
|
from langchain.llms import OpenAI |
|
from langchain.text_splitter import SpacyTextSplitter |
|
from langchain.document_loaders import TextLoader |
|
from langchain.document_loaders import DirectoryLoader |
|
from langchain.indexes import VectorstoreIndexCreator |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import Pinecone |
|
|
|
|
|
openai_key="你的key" |
|
pinecone_key="你的key" |
|
pinecone_index="你的库" |
|
pinecone_environment="你的Environment" |
|
pinecone_namespace="你的Namespace" |
|
|
|
|
|
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890' |
|
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890' |
|
|
|
|
|
pinecone.init( |
|
api_key=pinecone_key, |
|
environment=pinecone_environment |
|
) |
|
index = pinecone.Index(pinecone_index) |
|
|
|
|
|
embeddings = OpenAIEmbeddings(openai_api_key=openai_key) |
|
|
|
|
|
text_splitter = SpacyTextSplitter(pipeline='zh_core_web_sm',chunk_size=1000,chunk_overlap=200) |
|
|
|
|
|
loader = DirectoryLoader('../docs', glob="**/*.txt", loader_cls=TextLoader) |
|
|
|
|
|
documents = loader.load() |
|
|
|
|
|
split_text = text_splitter.split_documents(documents) |
|
try: |
|
for document in tqdm(split_text): |
|
|
|
Pinecone.from_documents([document], embeddings, index_name=pinecone_index) |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
quit() |
|
|
|
|
|
|