KhangPTT373 commited on
Commit
683c41b
·
verified ·
1 Parent(s): 5c0bbda

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +44 -0
  2. .gitignore +3 -0
  3. README.MD +38 -0
  4. README.md +3 -9
  5. __pycache__/config.cpython-311.pyc +0 -0
  6. __pycache__/utils.cpython-311.pyc +0 -0
  7. bge_model_ctranslate2/config.json +7 -0
  8. bge_model_ctranslate2/model.bin +3 -0
  9. bge_model_ctranslate2/vocabulary.json +0 -0
  10. chroma_service.py +89 -0
  11. data/data/301f209e-0482-4481-b8d1-f7e72292463f/data_level0.bin +3 -0
  12. data/data/301f209e-0482-4481-b8d1-f7e72292463f/header.bin +3 -0
  13. data/data/301f209e-0482-4481-b8d1-f7e72292463f/index_metadata.pickle +3 -0
  14. data/data/301f209e-0482-4481-b8d1-f7e72292463f/length.bin +3 -0
  15. data/data/301f209e-0482-4481-b8d1-f7e72292463f/link_lists.bin +3 -0
  16. data/data/35323312-d0b8-43cb-8e9e-d36d78781612/data_level0.bin +3 -0
  17. data/data/35323312-d0b8-43cb-8e9e-d36d78781612/header.bin +3 -0
  18. data/data/35323312-d0b8-43cb-8e9e-d36d78781612/index_metadata.pickle +3 -0
  19. data/data/35323312-d0b8-43cb-8e9e-d36d78781612/length.bin +3 -0
  20. data/data/35323312-d0b8-43cb-8e9e-d36d78781612/link_lists.bin +3 -0
  21. data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/data_level0.bin +3 -0
  22. data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/header.bin +3 -0
  23. data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/index_metadata.pickle +3 -0
  24. data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/length.bin +3 -0
  25. data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/link_lists.bin +3 -0
  26. data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/data_level0.bin +3 -0
  27. data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/header.bin +3 -0
  28. data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/length.bin +3 -0
  29. data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/link_lists.bin +0 -0
  30. data/data/chroma.sqlite3 +3 -0
  31. data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/data_level0.bin +3 -0
  32. data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/header.bin +3 -0
  33. data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/index_metadata.pickle +3 -0
  34. data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/length.bin +3 -0
  35. data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/link_lists.bin +3 -0
  36. gradio_demo.py +114 -0
  37. logs/__pycache__/logger_config.cpython-311.pyc +0 -0
  38. logs/chat_inference.log +0 -0
  39. logs/chroma.log +0 -0
  40. logs/encoder_inference.log +0 -0
  41. logs/init_profile.log +0 -0
  42. logs/logger_config.py +34 -0
  43. logs/offline_flow.log +0 -0
  44. logs/system.log +3 -0
  45. pdf/15_9_F1_F1A_F1B_EOWR_directional_drilling_MWD_Mudlogging.pdf +3 -0
  46. pdf/A Stratigraphic Reconstruction of Bulk Volatile Chemistry from Fluid Inclusions_FI090048b-1.pdf +3 -0
  47. pdf/BIOSTRAT_REPORT_1.pdf +0 -0
  48. pdf/BIOSTRAT_REPORT_2.pdf +3 -0
  49. pdf/DRILLING_REPORT_1.pdf +3 -0
  50. pdf/FWR_completion.pdf +3 -0
.gitattributes CHANGED
@@ -33,3 +33,47 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/data/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
+ logs/system.log filter=lfs diff=lfs merge=lfs -text
38
+ pdf/15_9_F1_F1A_F1B_EOWR_directional_drilling_MWD_Mudlogging.pdf filter=lfs diff=lfs merge=lfs -text
39
+ pdf/A[[:space:]]Stratigraphic[[:space:]]Reconstruction[[:space:]]of[[:space:]]Bulk[[:space:]]Volatile[[:space:]]Chemistry[[:space:]]from[[:space:]]Fluid[[:space:]]Inclusions_FI090048b-1.pdf filter=lfs diff=lfs merge=lfs -text
40
+ pdf/BIOSTRAT_REPORT_2.pdf filter=lfs diff=lfs merge=lfs -text
41
+ pdf/DRILLING_REPORT_1.pdf filter=lfs diff=lfs merge=lfs -text
42
+ pdf/FWR_completion.pdf filter=lfs diff=lfs merge=lfs -text
43
+ pdf/MWD_REPORT_2.pdf filter=lfs diff=lfs merge=lfs -text
44
+ pdf/PETROPHYSICAL_REPORT_1[[:space:]](3).pdf filter=lfs diff=lfs merge=lfs -text
45
+ pdf/PETROPHYSICAL_REPORT_1.pdf filter=lfs diff=lfs merge=lfs -text
46
+ pdf/PETROPHYSICAL_REPORT_4.pdf filter=lfs diff=lfs merge=lfs -text
47
+ pdf/RXT10010NS_Statoil_Volve_Seismic_QC_Report_v03.pdf filter=lfs diff=lfs merge=lfs -text
48
+ pdf/Rock[[:space:]]Mechanical[[:space:]]Testing[[:space:]]Triaxial[[:space:]]tests[[:space:]]on[[:space:]]sandstone[[:space:]]Well[[:space:]]15-9-19[[:space:]]A.pdf filter=lfs diff=lfs merge=lfs -text
49
+ pdf/Well[[:space:]]Test[[:space:]]Report_nr-20.pdf filter=lfs diff=lfs merge=lfs -text
50
+ pictures/RXT10010NS_Statoil_Volve_Seismic_QC_Report_v03/pictures_page_066.png filter=lfs diff=lfs merge=lfs -text
51
+ pictures/RXT10010NS_Statoil_Volve_Seismic_QC_Report_v03/pictures_page_071.png filter=lfs diff=lfs merge=lfs -text
52
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_006.png filter=lfs diff=lfs merge=lfs -text
53
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_008.png filter=lfs diff=lfs merge=lfs -text
54
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_015.png filter=lfs diff=lfs merge=lfs -text
55
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_018.png filter=lfs diff=lfs merge=lfs -text
56
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_025.png filter=lfs diff=lfs merge=lfs -text
57
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_026.png filter=lfs diff=lfs merge=lfs -text
58
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_030.png filter=lfs diff=lfs merge=lfs -text
59
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_031.png filter=lfs diff=lfs merge=lfs -text
60
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_032.png filter=lfs diff=lfs merge=lfs -text
61
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_033.png filter=lfs diff=lfs merge=lfs -text
62
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_034.png filter=lfs diff=lfs merge=lfs -text
63
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_035.png filter=lfs diff=lfs merge=lfs -text
64
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_040.png filter=lfs diff=lfs merge=lfs -text
65
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_042.png filter=lfs diff=lfs merge=lfs -text
66
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_043.png filter=lfs diff=lfs merge=lfs -text
67
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_045.png filter=lfs diff=lfs merge=lfs -text
68
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_046.png filter=lfs diff=lfs merge=lfs -text
69
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_047.png filter=lfs diff=lfs merge=lfs -text
70
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_048.png filter=lfs diff=lfs merge=lfs -text
71
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_049.png filter=lfs diff=lfs merge=lfs -text
72
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_051.png filter=lfs diff=lfs merge=lfs -text
73
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_052.png filter=lfs diff=lfs merge=lfs -text
74
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_055.png filter=lfs diff=lfs merge=lfs -text
75
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_064.png filter=lfs diff=lfs merge=lfs -text
76
+ pictures/Well[[:space:]]Test[[:space:]]Report_nr-20/pictures_page_068.png filter=lfs diff=lfs merge=lfs -text
77
+ tables/Well[[:space:]]Test[[:space:]]Report_nr-20/tables_page_077.png filter=lfs diff=lfs merge=lfs -text
78
+ tables/Well[[:space:]]Test[[:space:]]Report_nr-20/tables_page_088.png filter=lfs diff=lfs merge=lfs -text
79
+ tables/Well[[:space:]]Test[[:space:]]Report_nr-20/tables_page_089.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv
2
+ pdf_parsing_service.py
3
+ config.py
README.MD ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Folder structure
2
+ ```
3
+ ORAL_PDF_QA/
4
+ ├── __pycache__/
5
+ ├── bge_model_ctranslate2/
6
+ ├── data/
7
+ ├── parsed/
8
+ ├── logs/
9
+ ├── pdf/
10
+ ├── pictures/
11
+ ├── tables/
12
+ ├── venv/
13
+ ├── .gitignore
14
+ ├── chroma_service.py
15
+ ├── config.py
16
+ ├── gradio_demo.py
17
+ ├── pdf_parsing_service.py
18
+ ├── questions.txt
19
+ ├── README.MD
20
+ ├── requirements.txt
21
+ └── utils.py
22
+ ```
23
+ # Download
24
+ ```
25
+ pip install -r requirements.txt
26
+ ```
27
+ Download `bge_model_ctranslate2` embedding model<br>
28
+ Download `parsed` folder at https://drive.google.com/drive/folders/174I-pX1f7_mGG28Wwd9JPOgnOS5O16BA?usp=sharing<br>
29
+ Download `tables` folder (extracted tables) from https://drive.google.com/drive/folders/12r0F_Ce25kecUSzp_HvjHjhrV6LbyYyx?usp=sharing<br>
30
+ Download `pictures` folder (extracted pictures) from https://drive.google.com/drive/folders/1EvTLNNrBvQr-_lIzZSRL8ayrevKTmtJK?usp=sharing<br>
31
+ # Usage
32
+ ```
33
+ python chroma_service.py
34
+ ```
35
+
36
+ ```
37
+ pyrhon gradio_demo.py
38
+ ```
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: KhangPTT373pdf Qa
3
- emoji: 💻
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.5.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: KhangPTT373pdf_qa
3
+ app_file: gradio_demo.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.44.1
 
 
6
  ---
 
 
__pycache__/config.cpython-311.pyc ADDED
Binary file (3.29 kB). View file
 
__pycache__/utils.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
bge_model_ctranslate2/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "layer_norm_epsilon": 1e-12,
5
+ "multi_query_attention": false,
6
+ "unk_token": "[UNK]"
7
+ }
bge_model_ctranslate2/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65735518664364784cdd7cb919e054777253fe7a7c76924f0a20ef539e5adac8
3
+ size 437937363
bge_model_ctranslate2/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
chroma_service.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ctranslate2
2
+ from transformers import AutoTokenizer
3
+
4
+ import torch
5
+ import numpy as np
6
+ from fastapi import FastAPI, HTTPException
7
+ from pydantic import BaseModel
8
+ import os
9
+ import argparse
10
+ import time
11
+
12
+ model_name = "BAAI/bge-base-en-v1.5"
13
+ model_save_path = "bge_model_ctranslate2"
14
+ # model_path = "bge_model_ctranslate2_base"
15
+
16
+
17
+ device = "cpu"
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ if device == "cuda":
20
+ translator = ctranslate2.Encoder(
21
+ model_save_path, device=device, compute_type="float16"
22
+ ) # or "cuda" for GPU
23
+ else:
24
+ translator = ctranslate2.Encoder(model_save_path, device=device)
25
+
26
+
27
+ def generate_embeddings(text):
28
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
29
+ input_ids = inputs["input_ids"].tolist()[0]
30
+ output = translator.forward_batch([input_ids])
31
+ pooler_output = output.pooler_output
32
+ if device == "cuda":
33
+ embeddings = (
34
+ torch.as_tensor(pooler_output, device=device).detach().cpu().tolist()[0]
35
+ )
36
+ else:
37
+ pooler_output = np.array(pooler_output)
38
+ embeddings = torch.as_tensor(pooler_output, device=device).detach().tolist()[0]
39
+ return embeddings
40
+
41
+
42
+ app = FastAPI()
43
+
44
+
45
+ class EmbeddingRequest(BaseModel):
46
+ input: str
47
+ model: str
48
+
49
+
50
+ class EmbeddingResponse(BaseModel):
51
+ object: str = "list"
52
+ data: list
53
+ model: str
54
+ usage: dict
55
+
56
+
57
+ @app.post("/v1/embeddings", response_model=EmbeddingResponse)
58
+ async def embeddings(request: EmbeddingRequest):
59
+ input_text = request.input
60
+ if not input_text:
61
+ raise HTTPException(status_code=400, detail="No input text provided")
62
+
63
+ # Generate embeddings
64
+ embeddings = generate_embeddings(input_text)
65
+
66
+ # Construct the response in OpenAI format
67
+ response = {
68
+ "object": "list",
69
+ "data": [{"object": "embedding", "embedding": embeddings, "index": 0}],
70
+ "model": request.model,
71
+ "usage": {
72
+ "prompt_tokens": len(input_text.split()),
73
+ "total_tokens": len(input_text.split()),
74
+ },
75
+ }
76
+
77
+ return response
78
+
79
+ @app.get("/ping")
80
+ async def ping():
81
+ return {"status": "pong"}
82
+
83
+ if __name__ == "__main__":
84
+ parser = argparse.ArgumentParser()
85
+ parser.add_argument("--port", type=int, default=5001)
86
+ args = parser.parse_args()
87
+ import uvicorn
88
+
89
+ uvicorn.run(app, host="0.0.0.0", port=args.port)
data/data/301f209e-0482-4481-b8d1-f7e72292463f/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26817d41f4fd0210de3aeaba692bcfd3787a3124105ea7113200931776d30dd5
3
+ size 6424000
data/data/301f209e-0482-4481-b8d1-f7e72292463f/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbb8ddc12ddf7fc70d7be8c485e0491a87e2adf54fa9e493e6770a9954bc6dc
3
+ size 100
data/data/301f209e-0482-4481-b8d1-f7e72292463f/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2f3900b00f3593016e97e0c793302cdfb9e13bd94aaaeff039584b4c7f1fd0
3
+ size 122222
data/data/301f209e-0482-4481-b8d1-f7e72292463f/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0f5ce310175e722a727cc950d4773f53e6ab072f4ce1ea08dc7aa69836db27
3
+ size 8000
data/data/301f209e-0482-4481-b8d1-f7e72292463f/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d50fda141c21845865f5aeb5b07c4ca527224d3a1d1fcba07ce4393423ae560
3
+ size 16976
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519ea8e24bfcbd36dfd739d83f3ee09c345b4969bfffcb327c560af291a41d5f
3
+ size 6424000
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbb8ddc12ddf7fc70d7be8c485e0491a87e2adf54fa9e493e6770a9954bc6dc
3
+ size 100
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:868497c1c9d41f896adfade8c43754af78644b6de8480998e0e23f50475c2336
3
+ size 122222
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6605bc81d2a30f2b7eee27c0a3801bc4f8d6f5a7a76f611d6c22868c5e5834
3
+ size 8000
data/data/35323312-d0b8-43cb-8e9e-d36d78781612/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f0b3a52596b1e577d95ca9bbf963fce9773ea21c6613c18311d0f6a062406cf
3
+ size 16976
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c14b42bfd5993080f6b80bd122a7220ab962e51aa422c5d2e285561babaa18b
3
+ size 9636000
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb22cb3659faa58b51aec196bbec777d66e55abd3210e961a19a510b1b1dadd
3
+ size 100
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aa573492580955ef74bf849395a08ee817986a6ec47219c18ff9bfbaf87c08a
3
+ size 184237
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12e000f3d0061b967d71b98e3f88f45629235f908798763c35831e6c73f97c5c
3
+ size 12000
data/data/6083e0ad-9de1-41aa-99b2-96721b0c344d/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f84cdd076847d328913012544c9921839b0bb43b9cb2b9fafd48e8c5b8b31f9a
3
+ size 25736
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a13e72541800c513c73dccea69f79e39cf4baef4fa23f7e117c0d6b0f5f99670
3
+ size 3212000
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
3
+ size 100
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e608af7d28f2213eed5a595d14fe2577707e0b332c3a30c90e700a7e39f76a
3
+ size 4000
data/data/6216d5be-f9de-43b9-b3b1-d131ad3f5560/link_lists.bin ADDED
File without changes
data/data/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294650f066c7050c555464def4c16c544d593f1d7a53f79d20d6956487dbd4df
3
+ size 29925376
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bab570b85b648a23d89c7ac0dcf49cbd61bb0be83e1af3219b0278a75a7fa3
3
+ size 3212000
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c1a8a65a02d7e986335d4f10011318afe72d6bd448675a177bb916de977de8
3
+ size 100
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b55fd3e4a7a1d550fcc51d468a0c5b07a504ab210af325db13917cd67b088544
3
+ size 31576
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9762c629b316c3e412b4372b4cb95ebed3d0fc618e85f67ba691311fd48f893f
3
+ size 4000
data/data/d7b3a1db-45de-4619-8ec9-81f7ee57d558/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb1c2e6ee46100baebd0bba773d89ec3ef7b196fdeb4413e02f7fc0cab4357aa
3
+ size 4888
gradio_demo.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gradio as gr
3
+ import json
4
+ import os
5
+ from openai import OpenAI
6
+ from gradio_multimodalchatbot import MultimodalChatbot
7
+ from gradio.data_classes import FileData
8
+ from config import settings
9
+ from utils import query_pdfs
10
+
11
+ def infer_chat(message):
12
+ model = settings.LLM_INFERENCE_MODEL
13
+ TOGETHER_API_KEY = settings.TOGETHER_API_KEY
14
+ client = OpenAI(
15
+ api_key=TOGETHER_API_KEY,
16
+ base_url='https://api.together.xyz/v1',
17
+ )
18
+ chat_response = client.chat.completions.create(
19
+ model=model,
20
+ messages=message,
21
+ top_p=0.2,
22
+ stream=False,
23
+ )
24
+ return chat_response.choices[0].message.content
25
+ def paraphrase(message):
26
+ infer_message = [{
27
+ "role": "user",
28
+ "content": f"""
29
+ Simple paraphrase this message : `{message}`
30
+ Just return the paraphrased sentence, do not say anything else.
31
+ """
32
+ }]
33
+ response = infer_chat(infer_message)
34
+ return response
35
+ def chat_response(message):
36
+ query_result = query_pdfs(message)
37
+
38
+ infer_message = [{
39
+ "role": "user",
40
+ "content": f"""
41
+ User will ask you the information relating to some pdf files.
42
+ This is their question: {message}
43
+ You are required to answer their question based on this relevant information which is queried from vector database according to the user's message. Extract the relevant info and answer to user. If the query result is table or picture,select the most appropriate table/picture provide path to the table. If the query give you different path, choose the first one.
44
+ This is the query result (information you rely on):
45
+ ```
46
+ {query_result}
47
+ ```
48
+ Your answer must be in this json format, dont reply anything else, like this:
49
+ {{
50
+ "text":"your response after concatenate all relative information to get the answer",
51
+ "files":"path_to_table or path_to_picture if you think a table or picture relate to user's question else you can leave this ''."
52
+ }}
53
+ """
54
+ }]
55
+ response = infer_chat(infer_message)
56
+ try:
57
+ json_loaded_response = json.loads(response)
58
+ print(json_loaded_response)
59
+ except:
60
+ print('JSON not properly generated')
61
+ print(response)
62
+
63
+ return json_loaded_response
64
+ # # user_msg3 = {"text": "Give me a video clip please.",
65
+ # # "files": []}
66
+ # # bot_msg3 = {"text": "Here is a video clip of the world",
67
+ # # "files": [{"file": FileData(path="table_Well Test Report_nr-20/table_page_004.png")},
68
+ # # ]}
69
+
70
+ # # conversation = [[user_msg3, bot_msg3]]
71
+
72
+ # # with gr.Blocks() as demo:
73
+ # # MultimodalChatbot(value=conversation, height=800)
74
+
75
+
76
+ # # demo.launch()
77
+
78
+
79
+
80
+ def process_chat(message, history):
81
+ # Call your chat function
82
+ response = chat_response(message)
83
+
84
+ # Append the new message to history
85
+ history.append((message, response["text"]))
86
+
87
+ # Handle image display
88
+ image = None
89
+ if response["files"] and os.path.isfile(response["files"]):
90
+ try:
91
+ image = response["files"]
92
+ except Exception as e:
93
+ print(f"Error loading image: {e}")
94
+ image = None
95
+
96
+ return history, image
97
+
98
+ if __name__ == "__main__":
99
+ with gr.Blocks() as demo:
100
+ chatbot = gr.Chatbot()
101
+ msg = gr.Textbox(label="Message")
102
+ image_output = gr.Image(label="Response Image")
103
+
104
+ msg.submit(
105
+ process_chat,
106
+ inputs=[msg, chatbot],
107
+ outputs=[chatbot, image_output],
108
+ queue=False
109
+ ).then(
110
+ lambda: "",
111
+ None,
112
+ msg
113
+ )
114
+ demo.launch(share=True)
logs/__pycache__/logger_config.cpython-311.pyc ADDED
Binary file (2 kB). View file
 
logs/chat_inference.log ADDED
File without changes
logs/chroma.log ADDED
File without changes
logs/encoder_inference.log ADDED
The diff for this file is too large to render. See raw diff
 
logs/init_profile.log ADDED
File without changes
logs/logger_config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from loguru import logger
2
+
3
+ logger.add(
4
+ "logs/chat_inference.log",
5
+ filter=lambda record: record["extra"].get("logger_name") == "chat_inference",
6
+ level="INFO",
7
+ )
8
+ logger.add(
9
+ "logs/chroma.log",
10
+ filter=lambda record: record["extra"].get("logger_name") == "chroma",
11
+ level="INFO",
12
+ )
13
+ logger.add(
14
+ "logs/encoder_inference.log",
15
+ filter=lambda record: record["extra"].get("logger_name") == "encoder_inference",
16
+ level="INFO",
17
+ )
18
+ logger.add(
19
+ "logs/init_profile.log",
20
+ filter=lambda record: record["extra"].get("logger_name") == "init_profile",
21
+ level="INFO",
22
+ )
23
+ logger.add(
24
+ "logs/offline_flow.log",
25
+ filter=lambda record: record["extra"].get("logger_name") == "offline_flow",
26
+ level="INFO",
27
+ )
28
+ logger.add(
29
+ "logs/system.log",
30
+ filter=lambda record: record["extra"].get("logger_name") == "system",
31
+ level="INFO",
32
+ )
33
+
34
+ __all__ = ["logger"]
logs/offline_flow.log ADDED
File without changes
logs/system.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f01ee701dc64347cbb987bea1dc435514f6e841e2b8534421a898e2c965eda
3
+ size 26008376
pdf/15_9_F1_F1A_F1B_EOWR_directional_drilling_MWD_Mudlogging.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fcc319424916d4e8b92dc638922a25ba8732570e1bc1803793328bd033a5130
3
+ size 7041417
pdf/A Stratigraphic Reconstruction of Bulk Volatile Chemistry from Fluid Inclusions_FI090048b-1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ee93d7cc9fbe16d6beed138d9714d57acffb4bed5a171ae7d2847f54c3e3ce7
3
+ size 1467698
pdf/BIOSTRAT_REPORT_1.pdf ADDED
Binary file (974 kB). View file
 
pdf/BIOSTRAT_REPORT_2.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11d9163d4bad64581fbad705beff63169b6b1db12279eb1b29d761bc9c03a5b2
3
+ size 1131578
pdf/DRILLING_REPORT_1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ae6dc9c060c355867cd4e9de2c647b735389e256f9a0b8c9e45cc1b7ab3ab1
3
+ size 1279799
pdf/FWR_completion.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59ce77015fc54d2e0f9fcc54ceec3dc3184b9b701124e12339ba907d021e793
3
+ size 1468326