Spaces:
Runtime error
Runtime error
KhangPTT373
commited on
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +44 -0
- .gitignore +3 -0
- README.MD +38 -0
- README.md +3 -9
- __pycache__/config.cpython-311.pyc +0 -0
- __pycache__/utils.cpython-311.pyc +0 -0
- bge_model_ctranslate2/config.json +7 -0
- bge_model_ctranslate2/model.bin +3 -0
- bge_model_ctranslate2/vocabulary.json +0 -0
- chroma_service.py +89 -0
- data/data/301f209e-0482-4481-b8d1-f7e72292463f/data_level0.bin +3 -0
- data/data/301f209e-0482-4481-b8d1-f7e72292463f/header.bin +3 -0
- data/data/301f209e-0482-4481-b8d1-f7e72292463f/index_metadata.pickle +3 -0
- data/data/301f209e-0482-4481-b8d1-f7e72292463f/length.bin +3 -0
- data/data/301f209e-0482-4481-b8d1-f7e72292463f/link_lists.bin +3 -0
- data/data/35323312-d0b8-43cb-8e9e-d36d78781612/data_level0.bin +3 -0
- data/data/35323312-d0b8-43cb-8e9e-d36d78781612/header.bin +3 -0
- data/data/35323312-d0b8-43cb-8e9e-d36d78781612/index_metadata.pickle +3 -0
- data/data/35323312-d0b8-43cb-8e9e-d36d78781612/length.bin +3 -0
- data/data/35323312-d0b8-43cb-8e9e-d36d78781612/link_lists.bin +3 -0
- data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/data_level0.bin +3 -0
- data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/header.bin +3 -0
- data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/index_metadata.pickle +3 -0
- data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/length.bin +3 -0
- data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/link_lists.bin +3 -0
- data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/data_level0.bin +3 -0
- data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/header.bin +3 -0
- data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/length.bin +3 -0
- data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/link_lists.bin +0 -0
- data/data/chroma.sqlite3 +3 -0
- data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/data_level0.bin +3 -0
- data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/header.bin +3 -0
- data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/index_metadata.pickle +3 -0
- data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/length.bin +3 -0
- data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/link_lists.bin +3 -0
- gradio_demo.py +114 -0
- logs/__pycache__/logger_config.cpython-311.pyc +0 -0
- logs/chat_inference.log +0 -0
- logs/chroma.log +0 -0
- logs/encoder_inference.log +0 -0
- logs/init_profile.log +0 -0
- logs/logger_config.py +34 -0
- logs/offline_flow.log +0 -0
- logs/system.log +3 -0
- pdf/15_9_F1_F1A_F1B_EOWR_directional_drilling_MWD_Mudlogging.pdf +3 -0
- pdf/A Stratigraphic Reconstruction of Bulk Volatile Chemistry from Fluid Inclusions_FI090048b-1.pdf +3 -0
- pdf/BIOSTRAT_REPORT_1.pdf +0 -0
- pdf/BIOSTRAT_REPORT_2.pdf +3 -0
- pdf/DRILLING_REPORT_1.pdf +3 -0
- pdf/FWR_completion.pdf +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,47 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/data/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
logs/system.log filter=lfs diff=lfs merge=lfs -text
|
38 |
+
pdf/15_9_F1_F1A_F1B_EOWR_directional_drilling_MWD_Mudlogging.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
+
pdf/A[[:space:]]Stratigraphic[[:space:]]Reconstruction[[:space:]]of[[:space:]]Bulk[[:space:]]Volatile[[:space:]]Chemistry[[:space:]]from[[:space:]]Fluid[[:space:]]Inclusions_FI090048b-1.pdf filter=lfs diff=lfs merge=lfs -text
|
40 |
+
pdf/BIOSTRAT_REPORT_2.pdf filter=lfs diff=lfs merge=lfs -text
|
41 |
+
pdf/DRILLING_REPORT_1.pdf filter=lfs diff=lfs merge=lfs -text
|
42 |
+
pdf/FWR_completion.pdf filter=lfs diff=lfs merge=lfs -text
|
43 |
+
pdf/MWD_REPORT_2.pdf filter=lfs diff=lfs merge=lfs -text
|
44 |
+
pdf/PETROPHYSICAL_REPORT_1[[:space:]](3).pdf filter=lfs diff=lfs merge=lfs -text
|
45 |
+
pdf/PETROPHYSICAL_REPORT_1.pdf filter=lfs diff=lfs merge=lfs -text
|
46 |
+
pdf/PETROPHYSICAL_REPORT_4.pdf filter=lfs diff=lfs merge=lfs -text
|
47 |
+
pdf/RXT10010NS_Statoil_Volve_Seismic_QC_Report_v03.pdf filter=lfs diff=lfs merge=lfs -text
|
48 |
+
pdf/Rock[[:space:]]Mechanical[[:space:]]Testing[[:space:]]Triaxial[[:space:]]tests[[:space:]]on[[:space:]]sandstone[[:space:]]Well[[:space:]]15-9-19[[:space:]]A.pdf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
pdf/Well[[:space:]]Test[[:space:]]Report_nr-20.pdf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
pictures/RXT10010NS_Statoil_Volve_Seismic_QC_Report_v03/pictures_page_066.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
pictures/RXT10010NS_Statoil_Volve_Seismic_QC_Report_v03/pictures_page_071.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_006.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_008.png filter=lfs diff=lfs merge=lfs -text
|
54 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_015.png filter=lfs diff=lfs merge=lfs -text
|
55 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_018.png filter=lfs diff=lfs merge=lfs -text
|
56 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_025.png filter=lfs diff=lfs merge=lfs -text
|
57 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_026.png filter=lfs diff=lfs merge=lfs -text
|
58 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_030.png filter=lfs diff=lfs merge=lfs -text
|
59 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_031.png filter=lfs diff=lfs merge=lfs -text
|
60 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_032.png filter=lfs diff=lfs merge=lfs -text
|
61 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_033.png filter=lfs diff=lfs merge=lfs -text
|
62 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_034.png filter=lfs diff=lfs merge=lfs -text
|
63 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_035.png filter=lfs diff=lfs merge=lfs -text
|
64 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_040.png filter=lfs diff=lfs merge=lfs -text
|
65 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_042.png filter=lfs diff=lfs merge=lfs -text
|
66 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_043.png filter=lfs diff=lfs merge=lfs -text
|
67 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_045.png filter=lfs diff=lfs merge=lfs -text
|
68 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_046.png filter=lfs diff=lfs merge=lfs -text
|
69 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_047.png filter=lfs diff=lfs merge=lfs -text
|
70 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_048.png filter=lfs diff=lfs merge=lfs -text
|
71 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_049.png filter=lfs diff=lfs merge=lfs -text
|
72 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_051.png filter=lfs diff=lfs merge=lfs -text
|
73 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_052.png filter=lfs diff=lfs merge=lfs -text
|
74 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_055.png filter=lfs diff=lfs merge=lfs -text
|
75 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_064.png filter=lfs diff=lfs merge=lfs -text
|
76 |
+
pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_068.png filter=lfs diff=lfs merge=lfs -text
|
77 |
+
tables/Well[[:space:]]Test[[:space:]]Report_nr-20/tables_page_077.png filter=lfs diff=lfs merge=lfs -text
|
78 |
+
tables/Well[[:space:]]Test[[:space:]]Report_nr-20/tables_page_088.png filter=lfs diff=lfs merge=lfs -text
|
79 |
+
tables/Well[[:space:]]Test[[:space:]]Report_nr-20/tables_page_089.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
venv
|
2 |
+
pdf_parsing_service.py
|
3 |
+
config.py
|
README.MD
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Folder structure
|
2 |
+
```
|
3 |
+
ORAL_PDF_QA/
|
4 |
+
├── __pycache__/
|
5 |
+
├── bge_model_ctranslate2/
|
6 |
+
├── data/
|
7 |
+
├── parsed/
|
8 |
+
├── logs/
|
9 |
+
├── pdf/
|
10 |
+
├── pictures/
|
11 |
+
├── tables/
|
12 |
+
├── venv/
|
13 |
+
├── .gitignore
|
14 |
+
├── chroma_service.py
|
15 |
+
├── config.py
|
16 |
+
├── gradio_demo.py
|
17 |
+
├── pdf_parsing_service.py
|
18 |
+
├── questions.txt
|
19 |
+
├── README.MD
|
20 |
+
├── requirements.txt
|
21 |
+
└── utils.py
|
22 |
+
```
|
23 |
+
# Download
|
24 |
+
```
|
25 |
+
pip install -r requirements.txt
|
26 |
+
```
|
27 |
+
Download `bge_model_ctranslate2` embedding model<br>
|
28 |
+
Download `parsed` folder at https://drive.google.com/drive/folders/174I-pX1f7_mGG28Wwd9JPOgnOS5O16BA?usp=sharing<br>
|
29 |
+
Download `tables` folder (extracted tables) from https://drive.google.com/drive/folders/12r0F_Ce25kecUSzp_HvjHjhrV6LbyYyx?usp=sharing<br>
|
30 |
+
Download `pictures` folder (extracted pictures) from https://drive.google.com/drive/folders/1EvTLNNrBvQr-_lIzZSRL8ayrevKTmtJK?usp=sharing<br>
|
31 |
+
# Usage
|
32 |
+
```
|
33 |
+
python chroma_service.py
|
34 |
+
```
|
35 |
+
|
36 |
+
```
|
37 |
+
pyrhon gradio_demo.py
|
38 |
+
```
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: KhangPTT373pdf_qa
|
3 |
+
app_file: gradio_demo.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 4.44.1
|
|
|
|
|
6 |
---
|
|
|
|
__pycache__/config.cpython-311.pyc
ADDED
Binary file (3.29 kB). View file
|
|
__pycache__/utils.cpython-311.pyc
ADDED
Binary file (10.9 kB). View file
|
|
bge_model_ctranslate2/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"layer_norm_epsilon": 1e-12,
|
5 |
+
"multi_query_attention": false,
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
bge_model_ctranslate2/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65735518664364784cdd7cb919e054777253fe7a7c76924f0a20ef539e5adac8
|
3 |
+
size 437937363
|
bge_model_ctranslate2/vocabulary.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
chroma_service.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ctranslate2
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
from fastapi import FastAPI, HTTPException
|
7 |
+
from pydantic import BaseModel
|
8 |
+
import os
|
9 |
+
import argparse
|
10 |
+
import time
|
11 |
+
|
12 |
+
model_name = "BAAI/bge-base-en-v1.5"
|
13 |
+
model_save_path = "bge_model_ctranslate2"
|
14 |
+
# model_path = "bge_model_ctranslate2_base"
|
15 |
+
|
16 |
+
|
17 |
+
device = "cpu"
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
19 |
+
if device == "cuda":
|
20 |
+
translator = ctranslate2.Encoder(
|
21 |
+
model_save_path, device=device, compute_type="float16"
|
22 |
+
) # or "cuda" for GPU
|
23 |
+
else:
|
24 |
+
translator = ctranslate2.Encoder(model_save_path, device=device)
|
25 |
+
|
26 |
+
|
27 |
+
def generate_embeddings(text):
|
28 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
29 |
+
input_ids = inputs["input_ids"].tolist()[0]
|
30 |
+
output = translator.forward_batch([input_ids])
|
31 |
+
pooler_output = output.pooler_output
|
32 |
+
if device == "cuda":
|
33 |
+
embeddings = (
|
34 |
+
torch.as_tensor(pooler_output, device=device).detach().cpu().tolist()[0]
|
35 |
+
)
|
36 |
+
else:
|
37 |
+
pooler_output = np.array(pooler_output)
|
38 |
+
embeddings = torch.as_tensor(pooler_output, device=device).detach().tolist()[0]
|
39 |
+
return embeddings
|
40 |
+
|
41 |
+
|
42 |
+
app = FastAPI()
|
43 |
+
|
44 |
+
|
45 |
+
class EmbeddingRequest(BaseModel):
|
46 |
+
input: str
|
47 |
+
model: str
|
48 |
+
|
49 |
+
|
50 |
+
class EmbeddingResponse(BaseModel):
|
51 |
+
object: str = "list"
|
52 |
+
data: list
|
53 |
+
model: str
|
54 |
+
usage: dict
|
55 |
+
|
56 |
+
|
57 |
+
@app.post("/v1/embeddings", response_model=EmbeddingResponse)
|
58 |
+
async def embeddings(request: EmbeddingRequest):
|
59 |
+
input_text = request.input
|
60 |
+
if not input_text:
|
61 |
+
raise HTTPException(status_code=400, detail="No input text provided")
|
62 |
+
|
63 |
+
# Generate embeddings
|
64 |
+
embeddings = generate_embeddings(input_text)
|
65 |
+
|
66 |
+
# Construct the response in OpenAI format
|
67 |
+
response = {
|
68 |
+
"object": "list",
|
69 |
+
"data": [{"object": "embedding", "embedding": embeddings, "index": 0}],
|
70 |
+
"model": request.model,
|
71 |
+
"usage": {
|
72 |
+
"prompt_tokens": len(input_text.split()),
|
73 |
+
"total_tokens": len(input_text.split()),
|
74 |
+
},
|
75 |
+
}
|
76 |
+
|
77 |
+
return response
|
78 |
+
|
79 |
+
@app.get("/ping")
|
80 |
+
async def ping():
|
81 |
+
return {"status": "pong"}
|
82 |
+
|
83 |
+
if __name__ == "__main__":
|
84 |
+
parser = argparse.ArgumentParser()
|
85 |
+
parser.add_argument("--port", type=int, default=5001)
|
86 |
+
args = parser.parse_args()
|
87 |
+
import uvicorn
|
88 |
+
|
89 |
+
uvicorn.run(app, host="0.0.0.0", port=args.port)
|
data/data/301f209e-0482-4481-b8d1-f7e72292463f/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26817d41f4fd0210de3aeaba692bcfd3787a3124105ea7113200931776d30dd5
|
3 |
+
size 6424000
|
data/data/301f209e-0482-4481-b8d1-f7e72292463f/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dbb8ddc12ddf7fc70d7be8c485e0491a87e2adf54fa9e493e6770a9954bc6dc
|
3 |
+
size 100
|
data/data/301f209e-0482-4481-b8d1-f7e72292463f/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b2f3900b00f3593016e97e0c793302cdfb9e13bd94aaaeff039584b4c7f1fd0
|
3 |
+
size 122222
|
data/data/301f209e-0482-4481-b8d1-f7e72292463f/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f0f5ce310175e722a727cc950d4773f53e6ab072f4ce1ea08dc7aa69836db27
|
3 |
+
size 8000
|
data/data/301f209e-0482-4481-b8d1-f7e72292463f/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d50fda141c21845865f5aeb5b07c4ca527224d3a1d1fcba07ce4393423ae560
|
3 |
+
size 16976
|
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:519ea8e24bfcbd36dfd739d83f3ee09c345b4969bfffcb327c560af291a41d5f
|
3 |
+
size 6424000
|
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dbb8ddc12ddf7fc70d7be8c485e0491a87e2adf54fa9e493e6770a9954bc6dc
|
3 |
+
size 100
|
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:868497c1c9d41f896adfade8c43754af78644b6de8480998e0e23f50475c2336
|
3 |
+
size 122222
|
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d6605bc81d2a30f2b7eee27c0a3801bc4f8d6f5a7a76f611d6c22868c5e5834
|
3 |
+
size 8000
|
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f0b3a52596b1e577d95ca9bbf963fce9773ea21c6613c18311d0f6a062406cf
|
3 |
+
size 16976
|
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c14b42bfd5993080f6b80bd122a7220ab962e51aa422c5d2e285561babaa18b
|
3 |
+
size 9636000
|
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb22cb3659faa58b51aec196bbec777d66e55abd3210e961a19a510b1b1dadd
|
3 |
+
size 100
|
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7aa573492580955ef74bf849395a08ee817986a6ec47219c18ff9bfbaf87c08a
|
3 |
+
size 184237
|
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12e000f3d0061b967d71b98e3f88f45629235f908798763c35831e6c73f97c5c
|
3 |
+
size 12000
|
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f84cdd076847d328913012544c9921839b0bb43b9cb2b9fafd48e8c5b8b31f9a
|
3 |
+
size 25736
|
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a13e72541800c513c73dccea69f79e39cf4baef4fa23f7e117c0d6b0f5f99670
|
3 |
+
size 3212000
|
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
|
3 |
+
size 100
|
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5e608af7d28f2213eed5a595d14fe2577707e0b332c3a30c90e700a7e39f76a
|
3 |
+
size 4000
|
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/link_lists.bin
ADDED
File without changes
|
data/data/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:294650f066c7050c555464def4c16c544d593f1d7a53f79d20d6956487dbd4df
|
3 |
+
size 29925376
|
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9bab570b85b648a23d89c7ac0dcf49cbd61bb0be83e1af3219b0278a75a7fa3
|
3 |
+
size 3212000
|
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4c1a8a65a02d7e986335d4f10011318afe72d6bd448675a177bb916de977de8
|
3 |
+
size 100
|
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b55fd3e4a7a1d550fcc51d468a0c5b07a504ab210af325db13917cd67b088544
|
3 |
+
size 31576
|
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9762c629b316c3e412b4372b4cb95ebed3d0fc618e85f67ba691311fd48f893f
|
3 |
+
size 4000
|
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb1c2e6ee46100baebd0bba773d89ec3ef7b196fdeb4413e02f7fc0cab4357aa
|
3 |
+
size 4888
|
gradio_demo.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import gradio as gr
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
from openai import OpenAI
|
6 |
+
from gradio_multimodalchatbot import MultimodalChatbot
|
7 |
+
from gradio.data_classes import FileData
|
8 |
+
from config import settings
|
9 |
+
from utils import query_pdfs
|
10 |
+
|
11 |
+
def infer_chat(message):
|
12 |
+
model = settings.LLM_INFERENCE_MODEL
|
13 |
+
TOGETHER_API_KEY = settings.TOGETHER_API_KEY
|
14 |
+
client = OpenAI(
|
15 |
+
api_key=TOGETHER_API_KEY,
|
16 |
+
base_url='https://api.together.xyz/v1',
|
17 |
+
)
|
18 |
+
chat_response = client.chat.completions.create(
|
19 |
+
model=model,
|
20 |
+
messages=message,
|
21 |
+
top_p=0.2,
|
22 |
+
stream=False,
|
23 |
+
)
|
24 |
+
return chat_response.choices[0].message.content
|
25 |
+
def paraphrase(message):
|
26 |
+
infer_message = [{
|
27 |
+
"role": "user",
|
28 |
+
"content": f"""
|
29 |
+
Simple paraphrase this message : `{message}`
|
30 |
+
Just return the paraphrased sentence, do not say anything else.
|
31 |
+
"""
|
32 |
+
}]
|
33 |
+
response = infer_chat(infer_message)
|
34 |
+
return response
|
35 |
+
def chat_response(message):
|
36 |
+
query_result = query_pdfs(message)
|
37 |
+
|
38 |
+
infer_message = [{
|
39 |
+
"role": "user",
|
40 |
+
"content": f"""
|
41 |
+
User will ask you the information relating to some pdf files.
|
42 |
+
This is their question: {message}
|
43 |
+
You are required to answer their question based on this relevant information which is queried from vector database according to the user's message. Extract the relevant info and answer to user. If the query result is table or picture,select the most appropriate table/picture provide path to the table. If the query give you different path, choose the first one.
|
44 |
+
This is the query result (information you rely on):
|
45 |
+
```
|
46 |
+
{query_result}
|
47 |
+
```
|
48 |
+
Your answer must be in this json format, dont reply anything else, like this:
|
49 |
+
{{
|
50 |
+
"text":"your response after concatenate all relative information to get the answer",
|
51 |
+
"files":"path_to_table or path_to_picture if you think a table or picture relate to user's question else you can leave this ''."
|
52 |
+
}}
|
53 |
+
"""
|
54 |
+
}]
|
55 |
+
response = infer_chat(infer_message)
|
56 |
+
try:
|
57 |
+
json_loaded_response = json.loads(response)
|
58 |
+
print(json_loaded_response)
|
59 |
+
except:
|
60 |
+
print('JSON not properly generated')
|
61 |
+
print(response)
|
62 |
+
|
63 |
+
return json_loaded_response
|
64 |
+
# # user_msg3 = {"text": "Give me a video clip please.",
|
65 |
+
# # "files": []}
|
66 |
+
# # bot_msg3 = {"text": "Here is a video clip of the world",
|
67 |
+
# # "files": [{"file": FileData(path="table_Well Test Report_nr-20/table_page_004.png")},
|
68 |
+
# # ]}
|
69 |
+
|
70 |
+
# # conversation = [[user_msg3, bot_msg3]]
|
71 |
+
|
72 |
+
# # with gr.Blocks() as demo:
|
73 |
+
# # MultimodalChatbot(value=conversation, height=800)
|
74 |
+
|
75 |
+
|
76 |
+
# # demo.launch()
|
77 |
+
|
78 |
+
|
79 |
+
|
80 |
+
def process_chat(message, history):
|
81 |
+
# Call your chat function
|
82 |
+
response = chat_response(message)
|
83 |
+
|
84 |
+
# Append the new message to history
|
85 |
+
history.append((message, response["text"]))
|
86 |
+
|
87 |
+
# Handle image display
|
88 |
+
image = None
|
89 |
+
if response["files"] and os.path.isfile(response["files"]):
|
90 |
+
try:
|
91 |
+
image = response["files"]
|
92 |
+
except Exception as e:
|
93 |
+
print(f"Error loading image: {e}")
|
94 |
+
image = None
|
95 |
+
|
96 |
+
return history, image
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
with gr.Blocks() as demo:
|
100 |
+
chatbot = gr.Chatbot()
|
101 |
+
msg = gr.Textbox(label="Message")
|
102 |
+
image_output = gr.Image(label="Response Image")
|
103 |
+
|
104 |
+
msg.submit(
|
105 |
+
process_chat,
|
106 |
+
inputs=[msg, chatbot],
|
107 |
+
outputs=[chatbot, image_output],
|
108 |
+
queue=False
|
109 |
+
).then(
|
110 |
+
lambda: "",
|
111 |
+
None,
|
112 |
+
msg
|
113 |
+
)
|
114 |
+
demo.launch(share=True)
|
logs/__pycache__/logger_config.cpython-311.pyc
ADDED
Binary file (2 kB). View file
|
|
logs/chat_inference.log
ADDED
File without changes
|
logs/chroma.log
ADDED
File without changes
|
logs/encoder_inference.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
logs/init_profile.log
ADDED
File without changes
|
logs/logger_config.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from loguru import logger
|
2 |
+
|
3 |
+
logger.add(
|
4 |
+
"logs/chat_inference.log",
|
5 |
+
filter=lambda record: record["extra"].get("logger_name") == "chat_inference",
|
6 |
+
level="INFO",
|
7 |
+
)
|
8 |
+
logger.add(
|
9 |
+
"logs/chroma.log",
|
10 |
+
filter=lambda record: record["extra"].get("logger_name") == "chroma",
|
11 |
+
level="INFO",
|
12 |
+
)
|
13 |
+
logger.add(
|
14 |
+
"logs/encoder_inference.log",
|
15 |
+
filter=lambda record: record["extra"].get("logger_name") == "encoder_inference",
|
16 |
+
level="INFO",
|
17 |
+
)
|
18 |
+
logger.add(
|
19 |
+
"logs/init_profile.log",
|
20 |
+
filter=lambda record: record["extra"].get("logger_name") == "init_profile",
|
21 |
+
level="INFO",
|
22 |
+
)
|
23 |
+
logger.add(
|
24 |
+
"logs/offline_flow.log",
|
25 |
+
filter=lambda record: record["extra"].get("logger_name") == "offline_flow",
|
26 |
+
level="INFO",
|
27 |
+
)
|
28 |
+
logger.add(
|
29 |
+
"logs/system.log",
|
30 |
+
filter=lambda record: record["extra"].get("logger_name") == "system",
|
31 |
+
level="INFO",
|
32 |
+
)
|
33 |
+
|
34 |
+
__all__ = ["logger"]
|
logs/offline_flow.log
ADDED
File without changes
|
logs/system.log
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22f01ee701dc64347cbb987bea1dc435514f6e841e2b8534421a898e2c965eda
|
3 |
+
size 26008376
|
pdf/15_9_F1_F1A_F1B_EOWR_directional_drilling_MWD_Mudlogging.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcc319424916d4e8b92dc638922a25ba8732570e1bc1803793328bd033a5130
|
3 |
+
size 7041417
|
pdf/A Stratigraphic Reconstruction of Bulk Volatile Chemistry from Fluid Inclusions_FI090048b-1.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ee93d7cc9fbe16d6beed138d9714d57acffb4bed5a171ae7d2847f54c3e3ce7
|
3 |
+
size 1467698
|
pdf/BIOSTRAT_REPORT_1.pdf
ADDED
Binary file (974 kB). View file
|
|
pdf/BIOSTRAT_REPORT_2.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11d9163d4bad64581fbad705beff63169b6b1db12279eb1b29d761bc9c03a5b2
|
3 |
+
size 1131578
|
pdf/DRILLING_REPORT_1.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7ae6dc9c060c355867cd4e9de2c647b735389e256f9a0b8c9e45cc1b7ab3ab1
|
3 |
+
size 1279799
|
pdf/FWR_completion.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f59ce77015fc54d2e0f9fcc54ceec3dc3184b9b701124e12339ba907d021e793
|
3 |
+
size 1468326
|