kenghuoxiong commited on
Commit
9d2b8a1
1 Parent(s): cc5ea41

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ VecterStore2_512_txt/VecterStore2_512_txt/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44433bcb5ed0262754af85f5901f104260a430c79f8de3c8236a69505765b99d
3
+ size 101664000
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1f7ec366d94fb8e072e5fb52a99afa86987efcb648ded502eef46d7f4a9adc
3
+ size 100
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2dc505e368f9d06d240336f06579718cb29482598eb07cbfcd51679bce477f
3
+ size 1390301
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5affef09f6203089429f32605ef02a77f7b1d7eae70b95fcb9d042b0e97bf716
3
+ size 96000
VecterStore2_512_txt/VecterStore2_512_txt/84cefd75-4cd5-4c0f-80ea-1aadeb8480e0/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af355d0b0eaf9318079359802dc3f92ae1f9f3d30d6b14906b4729c788eaf0ee
3
+ size 202556
VecterStore2_512_txt/VecterStore2_512_txt/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ebd38670cdd472faf632885ebe2f261ef04c959f28cd00116a1c805be40dc1
3
+ size 190148608
VecterStore2_512_txt/chroma.sqlite3 ADDED
Binary file (147 kB). View file
 
langchain_qwen_run.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.chat_models import ChatOpenAI
2
+ from langchain.chains.retrieval_qa.base import RetrievalQA
3
+ from langchain_community.embeddings import OpenAIEmbeddings
4
+ from langchain.schema import HumanMessage, SystemMessage
5
+ import os
6
+ from langchain_community.document_loaders import DirectoryLoader
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.embeddings import OpenAIEmbeddings
10
+ from langchain_community.vectorstores import Chroma
11
+ import gradio as gr
12
+ import requests
13
+ from langchain_core.prompts import PromptTemplate
14
+ from qwen_api import qwen_api
15
+
16
+
17
+ def load_documents(directory='../langchain-database'):
18
+ loader = DirectoryLoader(directory, show_progress=True, use_multithreading=True, silent_errors=True)
19
+ documents = loader.load()
20
+ text_spliter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=200)
21
+ split_docs = text_spliter.split_documents(documents)
22
+ print(len(split_docs))
23
+ return split_docs
24
+
25
+
26
+ def load_embedding_mode():
27
+ # embedding_model_dict = {"m3e-base": "/home/xiongwen/m3e-base"}
28
+ encode_kwargs = {"normalize_embeddings": False}
29
+ model_kwargs = {"device": 'cuda'}
30
+ return HuggingFaceEmbeddings(model_name="/home/xiongwen/bge-m3",
31
+ model_kwargs=model_kwargs,
32
+ encode_kwargs=encode_kwargs)
33
+
34
+
35
+ def store_chroma(docs,embedding,persist_directory='./VecterStore2'):
36
+ db = Chroma.from_documents(docs, embedding)
37
+ # db.persist()
38
+ return db
39
+
40
+
41
+ def chat(question, history):
42
+ if len(history) == 0:
43
+ response = qa.invoke(question)['result']
44
+ else:
45
+ response = qwen_api(question, gradio_history=history)
46
+ return response
47
+
48
+
49
+ if __name__ == '__main__':
50
+ embedding = load_embedding_mode()
51
+ db = Chroma(persist_directory='/home/xiongwen/llama2-a40-ner/langchain-qwen/VecterStore2_512_txt/VecterStore2_512_txt', embedding_function=embedding)
52
+ os.environ["OPENAI_API_BASE"] = 'http://localhost:8000/v1'
53
+ os.environ["OPENAI_API_KEY"] = 'none'
54
+
55
+ llm = ChatOpenAI(
56
+ model="/home/xiongwen/Qwen1.5-110B-Chat",
57
+ temperature=0.8,)
58
+ prompt_template = """
59
+ {context}
60
+ The above content is a form of biological background knowledge. Please answer the questions according to the above content. Please be sure to answer the questions according to the background knowledge and attach the doi number of the information source when answering.
61
+ Question: {question}
62
+ Answer in English:"""
63
+ PROMPT = PromptTemplate(
64
+ template=prompt_template, input_variables=["context", "question"]
65
+ )
66
+ chain_type_kwargs = {"prompt": PROMPT}
67
+ # messages = [
68
+ # SystemMessage(content="you are an assistant in biology."),
69
+ # HumanMessage(content="which gene should be knocked to produce hyaluronic acid?")
70
+ # ]
71
+ # response = llm(messages)
72
+ # print('----------')
73
+ # print(response.content)
74
+ # print('----------')
75
+
76
+ # interface = gr.ChatInterface(chat)
77
+ # interface.launch(inbrowser=True)
78
+ retriever = db.as_retriever()
79
+ print(dir(retriever))
80
+ question = "which gene should be knocked in the process of producing ethanol in E.coli?"
81
+ # docs = retriever.get_relevant_documents(question, top_k=10)
82
+ # print(docs)
83
+ # docs = db.similarity_search(question, k=5)
84
+ # print(docs)
85
+ qa = RetrievalQA.from_chain_type(
86
+ llm=llm,
87
+ chain_type="stuff",
88
+ retriever=retriever,
89
+ chain_type_kwargs=chain_type_kwargs,
90
+ return_source_documents=True
91
+ )
92
+
93
+ interface = gr.ChatInterface(
94
+ fn=chat,
95
+ chatbot=gr.Chatbot(height=800, bubble_full_width=False),
96
+ theme=gr.themes.Default(spacing_size='sm', radius_size='sm'),
97
+ examples=['which gene should be knocked in the process of producing ethanol in Saccharomyces cerevisiae?']
98
+ )
99
+
100
+ interface.launch(inbrowser=True)
101
+ # response = qa.invoke("which gene should be knocked in the process of producing ethanol in Saccharomyces cerevisiae?")
102
+ # # response = qa({"query": question})
103
+ # print('----------')
104
+ # print(response)
105
+ # print('----------')
106
+ # print(response['source_documents'])
107
+