gofeco commited on
Commit
7b0074a
1 Parent(s): b5a732c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -4
app.py CHANGED
@@ -1,8 +1,35 @@
 
 
 
 
 
1
  import streamlit as st
2
- from transformers import pipeline
3
- pipe = pipeline('sentiment-analysis')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  text = st.text_area("enter text")
5
  if text:
6
- out = pipe(text)
7
- st.json(out)
 
 
 
 
 
 
 
8
 
 
1
+ from langchain.text_splitter import CharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain_community.document_loaders import DirectoryLoader
4
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
5
+ from langchain_community.vectorstores import Chroma
6
  import streamlit as st
7
+
8
+ text_loader_kwargs={'autodetect_encoding': True}
9
+ loader = DirectoryLoader("src_info", glob="./*.txt", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
10
+ docs = loader.load()
11
+
12
+ # split it into chunks
13
+ #text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
14
+ #docs = text_splitter.split_documents(documents)
15
+
16
+ # create the open-source embedding function
17
+ #embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
18
+ embedding_function = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
19
+
20
+ # load it into Chroma
21
+ chdb = Chroma.from_documents(docs, embedding_function, collection_metadata={"hnsw:space": "cosine"}, persist_directory='chroma_db_info')
22
+
23
+
24
  text = st.text_area("enter text")
25
  if text:
26
+ docs = chdb.similarity_search_with_score(query, k=3)
27
+ docnum = len(docs)
28
+ index = 0
29
+ ret = ''
30
+ for ii in range(docnum):
31
+ doc = docs[ii][0]
32
+ score = docs[ii][1]
33
+ ret += f"Return {index} ({score:.4f}) :\n{doc.page_content}\n"
34
+ st.ret
35