Spaces:
Running
Running
kenghuoxiong
commited on
Commit
•
9d2b8a1
1
Parent(s):
cc5ea41
Upload 8 files
Browse files- .gitattributes +1 -0
- VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/data_level0.bin +3 -0
- VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/header.bin +3 -0
- VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/index_metadata.pickle +3 -0
- VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/length.bin +3 -0
- VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/link_lists.bin +3 -0
- VecterStore2_512_txt/VecterStore2_512_txt/chroma.sqlite3 +3 -0
- VecterStore2_512_txt/chroma.sqlite3 +0 -0
- langchain_qwen_run.py +107 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
VecterStore2_512_txt/VecterStore2_512_txt/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44433bcb5ed0262754af85f5901f104260a430c79f8de3c8236a69505765b99d
|
3 |
+
size 101664000
|
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be1f7ec366d94fb8e072e5fb52a99afa86987efcb648ded502eef46d7f4a9adc
|
3 |
+
size 100
|
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe2dc505e368f9d06d240336f06579718cb29482598eb07cbfcd51679bce477f
|
3 |
+
size 1390301
|
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5affef09f6203089429f32605ef02a77f7b1d7eae70b95fcb9d042b0e97bf716
|
3 |
+
size 96000
|
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af355d0b0eaf9318079359802dc3f92ae1f9f3d30d6b14906b4729c788eaf0ee
|
3 |
+
size 202556
|
VecterStore2_512_txt/VecterStore2_512_txt/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69ebd38670cdd472faf632885ebe2f261ef04c959f28cd00116a1c805be40dc1
|
3 |
+
size 190148608
|
VecterStore2_512_txt/chroma.sqlite3
ADDED
Binary file (147 kB). View file
|
|
langchain_qwen_run.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.chat_models import ChatOpenAI
|
2 |
+
from langchain.chains.retrieval_qa.base import RetrievalQA
|
3 |
+
from langchain_community.embeddings import OpenAIEmbeddings
|
4 |
+
from langchain.schema import HumanMessage, SystemMessage
|
5 |
+
import os
|
6 |
+
from langchain_community.document_loaders import DirectoryLoader
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
9 |
+
from langchain_community.embeddings import OpenAIEmbeddings
|
10 |
+
from langchain_community.vectorstores import Chroma
|
11 |
+
import gradio as gr
|
12 |
+
import requests
|
13 |
+
from langchain_core.prompts import PromptTemplate
|
14 |
+
from qwen_api import qwen_api
|
15 |
+
|
16 |
+
|
17 |
+
def load_documents(directory='../langchain-database'):
|
18 |
+
loader = DirectoryLoader(directory, show_progress=True, use_multithreading=True, silent_errors=True)
|
19 |
+
documents = loader.load()
|
20 |
+
text_spliter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=200)
|
21 |
+
split_docs = text_spliter.split_documents(documents)
|
22 |
+
print(len(split_docs))
|
23 |
+
return split_docs
|
24 |
+
|
25 |
+
|
26 |
+
def load_embedding_mode():
|
27 |
+
# embedding_model_dict = {"m3e-base": "/home/xiongwen/m3e-base"}
|
28 |
+
encode_kwargs = {"normalize_embeddings": False}
|
29 |
+
model_kwargs = {"device": 'cuda'}
|
30 |
+
return HuggingFaceEmbeddings(model_name="/home/xiongwen/bge-m3",
|
31 |
+
model_kwargs=model_kwargs,
|
32 |
+
encode_kwargs=encode_kwargs)
|
33 |
+
|
34 |
+
|
35 |
+
def store_chroma(docs,embedding,persist_directory='./VecterStore2'):
|
36 |
+
db = Chroma.from_documents(docs, embedding)
|
37 |
+
# db.persist()
|
38 |
+
return db
|
39 |
+
|
40 |
+
|
41 |
+
def chat(question, history):
|
42 |
+
if len(history) == 0:
|
43 |
+
response = qa.invoke(question)['result']
|
44 |
+
else:
|
45 |
+
response = qwen_api(question, gradio_history=history)
|
46 |
+
return response
|
47 |
+
|
48 |
+
|
49 |
+
if __name__ == '__main__':
|
50 |
+
embedding = load_embedding_mode()
|
51 |
+
db = Chroma(persist_directory='/home/xiongwen/llama2-a40-ner/langchain-qwen/VecterStore2_512_txt/VecterStore2_512_txt', embedding_function=embedding)
|
52 |
+
os.environ["OPENAI_API_BASE"] = 'http://localhost:8000/v1'
|
53 |
+
os.environ["OPENAI_API_KEY"] = 'none'
|
54 |
+
|
55 |
+
llm = ChatOpenAI(
|
56 |
+
model="/home/xiongwen/Qwen1.5-110B-Chat",
|
57 |
+
temperature=0.8,)
|
58 |
+
prompt_template = """
|
59 |
+
{context}
|
60 |
+
The above content is a form of biological background knowledge. Please answer the questions according to the above content. Please be sure to answer the questions according to the background knowledge and attach the doi number of the information source when answering.
|
61 |
+
Question: {question}
|
62 |
+
Answer in English:"""
|
63 |
+
PROMPT = PromptTemplate(
|
64 |
+
template=prompt_template, input_variables=["context", "question"]
|
65 |
+
)
|
66 |
+
chain_type_kwargs = {"prompt": PROMPT}
|
67 |
+
# messages = [
|
68 |
+
# SystemMessage(content="you are an assistant in biology."),
|
69 |
+
# HumanMessage(content="which gene should be knocked to produce hyaluronic acid?")
|
70 |
+
# ]
|
71 |
+
# response = llm(messages)
|
72 |
+
# print('----------')
|
73 |
+
# print(response.content)
|
74 |
+
# print('----------')
|
75 |
+
|
76 |
+
# interface = gr.ChatInterface(chat)
|
77 |
+
# interface.launch(inbrowser=True)
|
78 |
+
retriever = db.as_retriever()
|
79 |
+
print(dir(retriever))
|
80 |
+
question = "which gene should be knocked in the process of producing ethanol in E.coli?"
|
81 |
+
# docs = retriever.get_relevant_documents(question, top_k=10)
|
82 |
+
# print(docs)
|
83 |
+
# docs = db.similarity_search(question, k=5)
|
84 |
+
# print(docs)
|
85 |
+
qa = RetrievalQA.from_chain_type(
|
86 |
+
llm=llm,
|
87 |
+
chain_type="stuff",
|
88 |
+
retriever=retriever,
|
89 |
+
chain_type_kwargs=chain_type_kwargs,
|
90 |
+
return_source_documents=True
|
91 |
+
)
|
92 |
+
|
93 |
+
interface = gr.ChatInterface(
|
94 |
+
fn=chat,
|
95 |
+
chatbot=gr.Chatbot(height=800, bubble_full_width=False),
|
96 |
+
theme=gr.themes.Default(spacing_size='sm', radius_size='sm'),
|
97 |
+
examples=['which gene should be knocked in the process of producing ethanol in Saccharomyces cerevisiae?']
|
98 |
+
)
|
99 |
+
|
100 |
+
interface.launch(inbrowser=True)
|
101 |
+
# response = qa.invoke("which gene should be knocked in the process of producing ethanol in Saccharomyces cerevisiae?")
|
102 |
+
# # response = qa({"query": question})
|
103 |
+
# print('----------')
|
104 |
+
# print(response)
|
105 |
+
# print('----------')
|
106 |
+
# print(response['source_documents'])
|
107 |
+
|