4172637469634d6f6e6b6579733a33302e30372e3234
Browse files- .gitignore +2 -0
- .streamlit/config.toml +1 -1
- app.py +102 -41
- audio_processing/A2T.py +6 -6
- audio_processing/T2A.py +11 -14
- audio_processing/config.py +1 -19
- command/basic/basic_commands.py +0 -37
- command/command_interface.py +0 -20
- command/commands.yaml +0 -25
- command/utils/form_documentation.py +0 -45
- command/utils/load_yaml.py +0 -12
- infrastructure/__init__.py +0 -0
- languages.txt +111 -10
- llm/__init__.py +0 -0
- llm/apimodels/gemini_model.py +110 -0
- llm/apimodels/hf_model.py +186 -0
- llm/huggingfacehub/hf_model.py +0 -140
- llm/llamacpp/lc_model.py +54 -99
- llm/llm_factory.py +0 -27
- llm/models/tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin +0 -3
- llm/prompts.yaml +0 -76
- llm/utils/chat.py +149 -0
- llm/{config.py β utils/config.py} +4 -4
- llm/{hf_interface.py β utils/hf_interface.py} +3 -3
- llm/{lc_interface.py β utils/lc_interface.py} +0 -0
- llm/utils/toggle.py +0 -0
- logs/chelsea_llm_chat.log +1 -0
- logs/chelsea_llm_gemini.log +1 -0
- logs/chelsea_llm_huggingfacehub.log +2 -0
- logs/chelsea_llm_llamacpp.log +1 -0
- requirements.txt +6 -2
- todo.txt +0 -7
- {command β utils}/__init__.py +0 -0
- utils/documentation.py +14 -0
- utils/keywords.py +286 -0
- utils/prompt_toggle.py +59 -0
- utils/prompts.yaml +95 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
**/__pycache__
|
2 |
+
.history/
|
.streamlit/config.toml
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
[theme]
|
2 |
-
backgroundColor = "
|
|
|
1 |
[theme]
|
2 |
+
backgroundColor = "#363534"
|
app.py
CHANGED
@@ -1,59 +1,120 @@
|
|
1 |
-
# version -
|
2 |
|
3 |
# python core libraries
|
4 |
-
import os
|
5 |
-
|
6 |
import psutil
|
7 |
-
|
|
|
8 |
# components from other authors
|
9 |
from streamlit_mic_recorder import mic_recorder
|
10 |
-
|
11 |
# core modules
|
12 |
from audio_processing.A2T import A2T
|
13 |
from audio_processing.T2A import T2A
|
14 |
-
from
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
t2a = T2A()
|
31 |
|
32 |
def main():
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
52 |
|
53 |
if __name__ == "__main__":
|
54 |
print(f"Total Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB")
|
55 |
print(f"Available Memory: {psutil.virtual_memory().available / (1024**3):.2f} GB")
|
56 |
print(f"CPU Cores: {psutil.cpu_count()}")
|
57 |
print(f"CPU Usage: {psutil.cpu_percent()}%")
|
58 |
-
|
59 |
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# version - ArcticMonkeys:30.07.24
|
2 |
|
3 |
# python core libraries
|
|
|
|
|
4 |
import psutil
|
5 |
+
# streamlit
|
6 |
+
import streamlit as st
|
7 |
# components from other authors
|
8 |
from streamlit_mic_recorder import mic_recorder
|
|
|
9 |
# core modules
|
10 |
from audio_processing.A2T import A2T
|
11 |
from audio_processing.T2A import T2A
|
12 |
+
from llm.utils.chat import Conversation
|
13 |
+
# utils modules
|
14 |
+
from utils.keywords import keywords
|
15 |
+
from utils.prompt_toggle import select_prompt, load_prompts
|
16 |
+
from utils.documentation import Documentation
|
17 |
+
|
18 |
+
# TODO:
|
19 |
+
# * ΠΡΠΎΠ±ΠΈΡΠΈ Π² utils ΠΌΠΎΠΆΠ»ΠΈΠ²ΡΡΡΡ Π΄Π»Ρ Π²ΠΈΠΊΠΎΡΠΈΡΡΠ°Π½Π½Ρ ΡΡΠ·Π½ΠΈΡ
ΠΏΡΠΎΠΌΠΏΡΡΠ² -> Done
|
20 |
+
# * ΠΠΎΠ΄Π°ΡΠΈ ΡΠΊ ΡΠΎΠ±ΠΈΠ² Π½Π° HF Ρ
ΡΠΎ Π½Π° ΡΠΎΡΠΎ -> agent
|
21 |
+
# * ΠΠΎΠ΄Π°ΡΠΈ ΠΌΠΎΠΆΠ»ΠΈΠ²ΡΡΡ ΠΌΠ°Π»ΡΠ½ΠΊΠ°ΠΌΠΈ Π²ΠΈΡΡΡΡΠ²Π°ΡΠΈ ΠΌΠ°Ρ ΠΏΡΠΎΠ±Π»Π΅ΠΌΠΈ -> agent
|
22 |
+
# * ΠΠΎΠ΄Π°ΡΠΈ ΠΌΠΎΠ»ΠΈΠ²ΡΡΡΡ ΡΡΠ²ΠΎΡΡΠ²Π°ΡΠΈ/ΡΠ΅Π΄Π°Π³ΡΠ²Π°ΡΠΈ Π΄ΠΎΠΊΠΌΠ΅Π½ΡΠΈ(pdf, docx) -> agent
|
23 |
+
|
24 |
+
prompts = load_prompts()
|
25 |
+
doc = Documentation()
|
26 |
+
chat = Conversation()
|
|
|
27 |
t2a = T2A()
|
28 |
|
29 |
def main():
|
30 |
+
try:
|
31 |
+
mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
|
32 |
+
if mic is not None:
|
33 |
+
a2t = A2T(mic["bytes"])
|
34 |
+
text = a2t.predict()
|
35 |
+
print(f"Text: {text}")
|
36 |
+
|
37 |
+
prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
|
38 |
+
print(f"Prompt:\n{prompt}")
|
39 |
+
response = chat.chatting(prompt=prompt if prompt is not None else text)
|
40 |
+
t2a.autoplay(response)
|
41 |
+
|
42 |
+
if response:
|
43 |
+
st.markdown(f"Your input: {prompt}")
|
44 |
+
st.markdown(f"Chelsea response: {response}")
|
45 |
+
|
46 |
+
prompt = None
|
47 |
+
response = None
|
48 |
+
except Exception as e:
|
49 |
+
print(f"An error occurred in main finction, reasone is: {e}")
|
50 |
+
doc.execution()
|
51 |
|
52 |
if __name__ == "__main__":
|
53 |
print(f"Total Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB")
|
54 |
print(f"Available Memory: {psutil.virtual_memory().available / (1024**3):.2f} GB")
|
55 |
print(f"CPU Cores: {psutil.cpu_count()}")
|
56 |
print(f"CPU Usage: {psutil.cpu_percent()}%")
|
|
|
57 |
main()
|
58 |
+
footer="""
|
59 |
+
<style>
|
60 |
+
/* Common styles for the footer */
|
61 |
+
.footer {
|
62 |
+
position: fixed;
|
63 |
+
left: 0;
|
64 |
+
bottom: 0;
|
65 |
+
width: 100%;
|
66 |
+
height: 60px; /* Set a fixed height for consistency */
|
67 |
+
font-size: 14px; /* Adjust font size for readability */
|
68 |
+
text-align: center;
|
69 |
+
padding: 15px 0; /* Reduced padding */
|
70 |
+
transition: color 0.3s, background-color 0.3s;
|
71 |
+
}
|
72 |
+
|
73 |
+
.footer p {
|
74 |
+
margin: 0; /* Remove default margins */
|
75 |
+
font-size: 18px; /* Adjust font size as needed */
|
76 |
+
}
|
77 |
+
|
78 |
+
a:link, a:visited {
|
79 |
+
text-decoration: dotted;
|
80 |
+
color: inherit; /* Use current text color */
|
81 |
+
}
|
82 |
+
|
83 |
+
a:hover, a:active {
|
84 |
+
background: linear-gradient(to right, #ffe44d, #ffdd1a, #ffd700, #ffd900);
|
85 |
+
-webkit-text-fill-color: transparent;
|
86 |
+
-webkit-background-clip: text;
|
87 |
+
}
|
88 |
+
|
89 |
+
.footer a:hover {
|
90 |
+
color: #ff4500; /* Different hover color */
|
91 |
+
}
|
92 |
+
|
93 |
+
/* Light mode styles */
|
94 |
+
@media (prefers-color-scheme: light) {
|
95 |
+
a:link, a:visited {
|
96 |
+
color: #0056b3; /* Blue color for links */
|
97 |
+
}
|
98 |
+
|
99 |
+
.footer a:hover {
|
100 |
+
color: #ff4500; /* Hover color for light mode */
|
101 |
+
}
|
102 |
+
}
|
103 |
+
|
104 |
+
/* Dark mode styles */
|
105 |
+
@media (prefers-color-scheme: dark) {
|
106 |
+
a:link, a:visited {
|
107 |
+
color: #ffd700; /* Gold color for links in dark mode */
|
108 |
+
}
|
109 |
+
|
110 |
+
.footer a:hover {
|
111 |
+
color: #ffa500; /* Hover color for dark mode */
|
112 |
+
}
|
113 |
+
}
|
114 |
+
</style>
|
115 |
+
|
116 |
+
<div class="footer">
|
117 |
+
<p>Please support the project on <a href="https://buymeacoffee.com/cineai" target="_blank">Buy Me a Coffee</a></p>
|
118 |
+
</div>
|
119 |
+
"""
|
120 |
+
st.markdown(footer,unsafe_allow_html=True)
|
audio_processing/A2T.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import librosa
|
3 |
import io
|
|
|
|
|
4 |
|
5 |
from typing import Optional
|
6 |
|
@@ -15,7 +15,7 @@ class A2T:
|
|
15 |
|
16 |
def __generate_text(self, inputs, task: Optional[str] = None) -> str:
|
17 |
if inputs is None:
|
18 |
-
raise
|
19 |
|
20 |
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
21 |
return transcribed_text
|
@@ -34,7 +34,7 @@ class A2T:
|
|
34 |
print(f"Sample rate : {sample_rate}")
|
35 |
return audio
|
36 |
except Exception as e:
|
37 |
-
print(f"Error loading audio: {e}")
|
38 |
|
39 |
def predict(self) -> str:
|
40 |
try:
|
@@ -43,7 +43,7 @@ class A2T:
|
|
43 |
audio = self.__preprocess(raw=raw)
|
44 |
print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
|
45 |
else:
|
46 |
-
raise
|
47 |
|
48 |
if isinstance(audio, np.ndarray):
|
49 |
return self.__generate_text(inputs=audio, task=TASK)
|
@@ -51,4 +51,4 @@ class A2T:
|
|
51 |
raise ValueError("Audio is not np array")
|
52 |
|
53 |
except Exception as e:
|
54 |
-
print(f"
|
|
|
|
|
|
|
1 |
import io
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
|
5 |
from typing import Optional
|
6 |
|
|
|
15 |
|
16 |
def __generate_text(self, inputs, task: Optional[str] = None) -> str:
|
17 |
if inputs is None:
|
18 |
+
raise ValueError(f"Input audio is None {inputs}, please provide audio")
|
19 |
|
20 |
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
21 |
return transcribed_text
|
|
|
34 |
print(f"Sample rate : {sample_rate}")
|
35 |
return audio
|
36 |
except Exception as e:
|
37 |
+
print(f"Error loading audio in the preprocess function in the A2T class: {e}")
|
38 |
|
39 |
def predict(self) -> str:
|
40 |
try:
|
|
|
43 |
audio = self.__preprocess(raw=raw)
|
44 |
print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
|
45 |
else:
|
46 |
+
raise ValueError(f"Please provide audio your audio {self.mic}")
|
47 |
|
48 |
if isinstance(audio, np.ndarray):
|
49 |
return self.__generate_text(inputs=audio, task=TASK)
|
|
|
51 |
raise ValueError("Audio is not np array")
|
52 |
|
53 |
except Exception as e:
|
54 |
+
print(f"An error occurred in the predict function in the A2T class: {e}")
|
audio_processing/T2A.py
CHANGED
@@ -3,27 +3,24 @@ from streamlit_TTS import auto_play, text_to_audio
|
|
3 |
|
4 |
|
5 |
class T2A:
|
6 |
-
def autoplay(self, input_text: Optional[str] = None, lang: str = "en"
|
7 |
"""
|
8 |
-
Plays audio
|
9 |
|
10 |
Args:
|
11 |
input_text (Optional[str], optional): Text to convert to audio. Defaults to None.
|
12 |
lang (str, optional): Language for text-to-speech conversion. Defaults to "en".
|
13 |
-
just_once (bool, optional): Flag to control whether audio plays only once. Defaults to False.
|
14 |
"""
|
15 |
|
16 |
-
if input_text is
|
17 |
-
if isinstance(input_text, str):
|
18 |
-
audio = text_to_audio(input_text, language=lang)
|
19 |
-
if just_once:
|
20 |
-
auto_play(audio)
|
21 |
-
just_once = False
|
22 |
-
else:
|
23 |
-
text = f"The text you provided is of data type {type(input_text)}, only string type is accepted"
|
24 |
-
audio = text_to_audio(text, language=lang)
|
25 |
-
auto_play(audio)
|
26 |
-
else:
|
27 |
text = "Please check the input text you have provided, it has a value of None"
|
28 |
audio = text_to_audio(text, language=lang)
|
29 |
auto_play(audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
class T2A:
|
6 |
+
def autoplay(self, input_text: Optional[str] = None, lang: str = "en") -> None:
|
7 |
"""
|
8 |
+
Plays audio based on the provided input text.
|
9 |
|
10 |
Args:
|
11 |
input_text (Optional[str], optional): Text to convert to audio. Defaults to None.
|
12 |
lang (str, optional): Language for text-to-speech conversion. Defaults to "en".
|
|
|
13 |
"""
|
14 |
|
15 |
+
if input_text is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
text = "Please check the input text you have provided, it has a value of None"
|
17 |
audio = text_to_audio(text, language=lang)
|
18 |
auto_play(audio)
|
19 |
+
|
20 |
+
if not isinstance(input_text, str):
|
21 |
+
text = f"The text you provided is of data type {type(input_text)}, only string type is accepted"
|
22 |
+
audio = text_to_audio(text, language=lang)
|
23 |
+
auto_play(audio)
|
24 |
+
|
25 |
+
audio = text_to_audio(input_text, language=lang)
|
26 |
+
auto_play(audio)
|
audio_processing/config.py
CHANGED
@@ -2,10 +2,8 @@
|
|
2 |
# https://magictool.ai/tool/text-to-hex-converter/ Here ArcticMonkey is name of version and rest of all is data and time
|
3 |
|
4 |
import torch
|
5 |
-
|
6 |
from transformers import pipeline
|
7 |
|
8 |
-
|
9 |
device = 0 if torch.cuda.is_available() else "cpu"
|
10 |
|
11 |
checkpoint_whisper = "openai/whisper-medium"
|
@@ -15,20 +13,4 @@ pipe = pipeline(
|
|
15 |
model=checkpoint_whisper,
|
16 |
device=device,
|
17 |
chunk_length_s=30,
|
18 |
-
)
|
19 |
-
|
20 |
-
# from parler_tts import ParlerTTSForConditionalGeneration
|
21 |
-
# from transformers import AutoTokenizer, AutoFeatureExtractor
|
22 |
-
|
23 |
-
# checkpoint_parler = "parler-tts/parler_tts_mini_v0.1"
|
24 |
-
|
25 |
-
# model_parler = ParlerTTSForConditionalGeneration.from_pretrained(checkpoint_parler).to(device)
|
26 |
-
# tokenizer = AutoTokenizer.from_pretrained(checkpoint_parler)
|
27 |
-
# feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint_parler)
|
28 |
-
|
29 |
-
# SAMPLE_RATE = feature_extractor.sampling_rate
|
30 |
-
# SEED = 42
|
31 |
-
|
32 |
-
# checkpoint_mms_tts_eng = "facebook/mms-tts-eng"
|
33 |
-
|
34 |
-
# pipe_tts = pipeline("text-to-speech", model=checkpoint_mms_tts_eng)
|
|
|
2 |
# https://magictool.ai/tool/text-to-hex-converter/ Here ArcticMonkey is name of version and rest of all is data and time
|
3 |
|
4 |
import torch
|
|
|
5 |
from transformers import pipeline
|
6 |
|
|
|
7 |
device = 0 if torch.cuda.is_available() else "cpu"
|
8 |
|
9 |
checkpoint_whisper = "openai/whisper-medium"
|
|
|
13 |
model=checkpoint_whisper,
|
14 |
device=device,
|
15 |
chunk_length_s=30,
|
16 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
command/basic/basic_commands.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import logging
|
3 |
-
|
4 |
-
import streamlit as st
|
5 |
-
|
6 |
-
from typing import Optional, Dict, Any
|
7 |
-
from ..command_interface import CommandInterface
|
8 |
-
|
9 |
-
|
10 |
-
class Documentation(CommandInterface):
|
11 |
-
def __init__(self,
|
12 |
-
command_file: str,
|
13 |
-
commands: Optional[str],
|
14 |
-
llm: Any,
|
15 |
-
id: int):
|
16 |
-
|
17 |
-
self.command_file = command_file
|
18 |
-
|
19 |
-
if commands is not None:
|
20 |
-
self.commands = commands
|
21 |
-
else:
|
22 |
-
self.commands = []
|
23 |
-
|
24 |
-
self.llm = llm
|
25 |
-
|
26 |
-
self.id = id
|
27 |
-
|
28 |
-
def get_command() -> Dict[str, Dict[str, list]]:
|
29 |
-
|
30 |
-
pass
|
31 |
-
|
32 |
-
def is_contains(commands: Optional[str], llm: Any, id: Any, command: Dict[str, Dict[str, list]]) -> bool:
|
33 |
-
|
34 |
-
pass
|
35 |
-
|
36 |
-
def execute() -> Any:
|
37 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
command/command_interface.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
from typing import Optional, Dict, Any
|
2 |
-
from abc import ABC, abstractmethod
|
3 |
-
|
4 |
-
|
5 |
-
class CommandHandler(ABC):
|
6 |
-
# {"base": {"name": command}, "advance": {"name": command}}
|
7 |
-
@abstractmethod
|
8 |
-
def get_command() -> Dict[str, Dict[str, list]]:
|
9 |
-
"Method to get command from yaml file and return dictionary"
|
10 |
-
pass
|
11 |
-
|
12 |
-
@abstractmethod
|
13 |
-
def is_contains() -> bool:
|
14 |
-
"""Method to check contains command in text or not"""
|
15 |
-
pass
|
16 |
-
|
17 |
-
@abstractmethod
|
18 |
-
def execute() -> Any:
|
19 |
-
"Method to execute command"
|
20 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
command/commands.yaml
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
# This file is needed to check what the user wants to do when using Chelsea. The structure is similar to the prompts.
|
2 |
-
# id: just a number between 1 and 100, an integer
|
3 |
-
# command_dict: all options for how the user can pronounce the command, divided into two languages - English (en) and Ukrainian (ua)
|
4 |
-
# description: a simple description of the command
|
5 |
-
|
6 |
-
commands:
|
7 |
-
- id: 1
|
8 |
-
command_dict: {"en": ["Documentation"], "ua": ["ΠΠΎΠΊΡΠΌΠ΅Π½ΡΠ°ΡΡΡ"]}
|
9 |
-
description: receive documentation
|
10 |
-
- id: 2
|
11 |
-
command_dict: {"en": ["Buy Me A Coffee", "BMAC", "Coffee", "Pay the ghost"], "ua": ["ΠΠ°Π²Π°", "ΠΠ°ΠΏΠ»Π°ΡΠΈ ΠΏΡΠΈΠΌΠ°ΡΡ"]}
|
12 |
-
description: support project on Buy Me A Coffee
|
13 |
-
- id: 3
|
14 |
-
command_dict: {"en": ["Translate"], "ua": ["ΠΠ΅ΡΠ΅ΠΊΠ»Π°Π΄"]}
|
15 |
-
description: translate
|
16 |
-
- id: 4
|
17 |
-
command_dict: {"en": ["Change model"], "ua": ["ΠΠΌΡΠ½ΠΈΡΠΈ ΠΌΠΎΠ΄Π΅Π»Ρ"]}
|
18 |
-
description: change model
|
19 |
-
- id: 5
|
20 |
-
command_dict: {"en": ["Yes", "Yeah", "Yep"], "ua": ["Π’Π°ΠΊ", "ΠΠ³Π°"]}
|
21 |
-
description: positive agreement
|
22 |
-
- id: 6
|
23 |
-
command_dict: {"en": ["No", "Nah"], "ua": ["ΠΡ", "ΠΡΡ", "ΠΡΡ"]}
|
24 |
-
description: negative agreement
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
command/utils/form_documentation.py
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import logging
|
3 |
-
|
4 |
-
from typing import Optional
|
5 |
-
|
6 |
-
TEMPLATE = """
|
7 |
-
A complete list of commands that are designed to facilitate the use of the voice assistant Chelsea.
|
8 |
-
The complete list consists of no more than 100 commands written in a txt file.
|
9 |
-
The list of commands will be updated as the assistant is developed.
|
10 |
-
The first version of the programme (Arctic Monkeys) contains a total of 6 commands.
|
11 |
-
|
12 |
-
The list of commands and their use.
|
13 |
-
|
14 |
-
documentation command: first used to inform you how you able to interact with assistant. To call this command just say Documentation in english or
|
15 |
-
ΠΠΎΠΊΡΠΌΠ΅Π½ΡΠ°ΡΡΡ in Ukrainian. Note you can use assistant without those commands, however for getting more advance expirience i strongly recommend use them.
|
16 |
-
|
17 |
-
bmac command: Support author on Buy Me a Coffee. To activate this command you can spell in english Buy Me A Coffee, BMAC, Coffee, Pay the ghost and
|
18 |
-
in Ukrainian ΠΠ°Π²Π°, ΠΠ°ΠΏΠ»Π°ΡΠΈ ΠΏΡΠΈΠΌΠ°ΡΡ.
|
19 |
-
|
20 |
-
translate command: Use for translating speech in language which you choice. Commands to use it in english is Translate and in Ukrainian is ΠΠ΅ΡΠ΅ΠΊΠ»Π°Π΄.
|
21 |
-
|
22 |
-
change model command: You able to choose model using hugging face api (hf) or local model using Llama. List of models for hf are: Mistaril and Tinyllama and for lc: Phi 3 and TinyLlama either.
|
23 |
-
To call command use in english Change model and in Ukrainian ΠΠΌΡΠ½ΠΈΡΠΈ ΠΌΠΎΠ΄Π΅Π»Ρ.
|
24 |
-
|
25 |
-
yes command: Command to confirm your consent. To call command use in english Yes, Yeah, Yep and in Ukrainian Π’Π°ΠΊ, ΠΠ³Π°.
|
26 |
-
|
27 |
-
no command: Command to confirm your disagreement. To call command use in english No, Nah and in Ukrainian ΠΡ, ΠΡΡ, ΠΡΡ.
|
28 |
-
"""
|
29 |
-
|
30 |
-
|
31 |
-
def generate_doc(path: Optional[str] = None) -> Optional[str]:
|
32 |
-
if path is not None:
|
33 |
-
file = os.path.join(path, NAME)
|
34 |
-
else:
|
35 |
-
current_dir = os.path.dirname(os.path.realpath(__file__))
|
36 |
-
file = os.path.join(current_dir, NAME)
|
37 |
-
|
38 |
-
logging.info(file)
|
39 |
-
|
40 |
-
try:
|
41 |
-
with open(file, 'w') as f:
|
42 |
-
f.write(TEMPLATE)
|
43 |
-
return file
|
44 |
-
except IOError as e:
|
45 |
-
logging.error(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
command/utils/load_yaml.py
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
import yaml
|
2 |
-
from typing import Any
|
3 |
-
|
4 |
-
|
5 |
-
def load_commands_from_yaml(file_path: str) -> Any:
|
6 |
-
try:
|
7 |
-
with open(file_path, 'r') as file:
|
8 |
-
commands_data = yaml.safe_load(file)
|
9 |
-
print(commands_data)
|
10 |
-
return commands_data.get('commands', [])
|
11 |
-
except IOError as e:
|
12 |
-
print(f"Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infrastructure/__init__.py
DELETED
File without changes
|
languages.txt
CHANGED
@@ -1,10 +1,111 @@
|
|
1 |
-
'english',
|
2 |
-
'
|
3 |
-
'
|
4 |
-
'
|
5 |
-
'
|
6 |
-
'
|
7 |
-
'
|
8 |
-
'
|
9 |
-
'
|
10 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'english',
|
2 |
+
'chinese',
|
3 |
+
'german',
|
4 |
+
'spanish',
|
5 |
+
'korean',
|
6 |
+
'french',
|
7 |
+
'japanese',
|
8 |
+
'portuguese',
|
9 |
+
'turkish',
|
10 |
+
'polish',
|
11 |
+
'catalan',
|
12 |
+
'dutch',
|
13 |
+
'arabic',
|
14 |
+
'swedish',
|
15 |
+
'italian',
|
16 |
+
'indonesian',
|
17 |
+
'hindi',
|
18 |
+
'finnish',
|
19 |
+
'vietnamese',
|
20 |
+
'hebrew',
|
21 |
+
'ukrainian',
|
22 |
+
'greek',
|
23 |
+
'malay',
|
24 |
+
'czech',
|
25 |
+
'romanian',
|
26 |
+
'danish',
|
27 |
+
'hungarian',
|
28 |
+
'tamil',
|
29 |
+
'norwegian',
|
30 |
+
'thai',
|
31 |
+
'urdu',
|
32 |
+
'croatian',
|
33 |
+
'bulgarian',
|
34 |
+
'lithuanian',
|
35 |
+
'latin',
|
36 |
+
'maori',
|
37 |
+
'malayalam',
|
38 |
+
'welsh',
|
39 |
+
'slovak',
|
40 |
+
'telugu',
|
41 |
+
'persian',
|
42 |
+
'latvian',
|
43 |
+
'bengali',
|
44 |
+
'serbian',
|
45 |
+
'azerbaijani',
|
46 |
+
'slovenian',
|
47 |
+
'kannada',
|
48 |
+
'estonian',
|
49 |
+
'macedonian',
|
50 |
+
'breton',
|
51 |
+
'basque',
|
52 |
+
'icelandic',
|
53 |
+
'armenian',
|
54 |
+
'nepali',
|
55 |
+
'mongolian',
|
56 |
+
'bosnian',
|
57 |
+
'kazakh',
|
58 |
+
'albanian',
|
59 |
+
'swahili',
|
60 |
+
'galician',
|
61 |
+
'marathi',
|
62 |
+
'punjabi',
|
63 |
+
'sinhala',
|
64 |
+
'khmer',
|
65 |
+
'shona',
|
66 |
+
'yoruba',
|
67 |
+
'somali',
|
68 |
+
'afrikaans',
|
69 |
+
'occitan',
|
70 |
+
'georgian',
|
71 |
+
'belarusian',
|
72 |
+
'tajik',
|
73 |
+
'sindhi',
|
74 |
+
'gujarati',
|
75 |
+
'amharic',
|
76 |
+
'yiddish',
|
77 |
+
'lao',
|
78 |
+
'uzbek',
|
79 |
+
'faroese',
|
80 |
+
'haitian creole',
|
81 |
+
'pashto',
|
82 |
+
'turkmen',
|
83 |
+
'nynorsk',
|
84 |
+
'maltese',
|
85 |
+
'sanskrit',
|
86 |
+
'luxembourgish',
|
87 |
+
'myanmar',
|
88 |
+
'tibetan',
|
89 |
+
'tagalog',
|
90 |
+
'malagasy',
|
91 |
+
'assamese',
|
92 |
+
'tatar',
|
93 |
+
'hawaiian',
|
94 |
+
'lingala',
|
95 |
+
'hausa',
|
96 |
+
'bashkir',
|
97 |
+
'javanese',
|
98 |
+
'sundanese',
|
99 |
+
'cantonese',
|
100 |
+
'burmese',
|
101 |
+
'valencian',
|
102 |
+
'flemish',
|
103 |
+
'haitian',
|
104 |
+
'letzeburgesch',
|
105 |
+
'pushto',
|
106 |
+
'panjabi',
|
107 |
+
'moldavian',
|
108 |
+
'moldovan',
|
109 |
+
'sinhalese',
|
110 |
+
'castilian',
|
111 |
+
'mandarin'
|
llm/__init__.py
DELETED
File without changes
|
llm/apimodels/gemini_model.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
|
5 |
+
from llm.utils.hf_interface import HFInterface
|
6 |
+
|
7 |
+
from langchain_google_genai import GoogleGenerativeAI
|
8 |
+
from abc import ABC
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
logger.setLevel(logging.ERROR)
|
12 |
+
|
13 |
+
file_handler = logging.FileHandler(
|
14 |
+
"logs/chelsea_llm_gemini.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{entity}.log"
|
15 |
+
logger.setLevel(logging.INFO) # informed
|
16 |
+
|
17 |
+
formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
18 |
+
file_handler.setFormatter(formatted)
|
19 |
+
|
20 |
+
logger.addHandler(file_handler)
|
21 |
+
logger.info("Getting information from apimodel module")
|
22 |
+
|
23 |
+
|
24 |
+
# 429 - You've exceeded the rate limit.
|
25 |
+
# 400 - The request body is malformed.
|
26 |
+
# 403 - Your API key doesn't have the required permissions.
|
27 |
+
# 404 - The requested resource wasn't found.
|
28 |
+
# 500 - An unexpected error occurred on Google's side.
|
29 |
+
# 503 - The service may be temporarily overloaded or down.
|
30 |
+
|
31 |
+
# Π―ΠΊΡΠΎ ΡΡΠ°ΠΏΠΈΡΡΡΡ ΠΎΠ΄Π½Π° Π· ΡΠΈΡ
ΠΏΠΎΠΌΠΈΠ»ΠΎΠΊ , ΡΠΎ ΡΠ»ΡΠ΄ ΠΏΠ΅ΡΠ΅ΠΌΠΊΠ½ΡΡΠΈΡΡ Π½Π° HF , ΡΠΊΡΠΎ ΡΠΊΠ°ΡΡ ΠΌΠΎΠ΄Π΅Π»Ρ Π·Π°Π½Π°Π΄ΡΠΎ ΠΏΠΎΠ²ΡΠ»ΡΠ½Π°
|
32 |
+
# ΠΏΠ΅ΡΠ΅ΠΌΠΊΠ½ΡΡΠΈΡΡ Π½Π° ΡΠ½ΡΡ ΠΏΡΡΠ»Ρ Π·Π°ΠΊΡΠ½ΡΠ΅Π½Π½Ρ Π²ΠΈΠΊΠΎΠ½Π°Π½Π½Ρ ΡΠ΅ΠΊΡΡΠΎΡ, ΡΠΊΡΠΎ ΡΡΠ°Π±Π»ΠΈ Π· HF ΠΏΠ΅ΡΠ΅ΠΌΠΊΠ½ΡΡΠΈΡΡ Π½Π° Π»ΠΎΠΊΠ°Π»ΡΠ½Ρ,
|
33 |
+
# ΡΠΊΡΠΎ ΠΆ Ρ ΠΊΠΎΡΠΈΡΡΡΠ²Π°ΡΠ° Π²ΡΠ΄ΡΡΡΠ½Ρ ΡΠ½ΡΠ΅ΡΠ½Π΅Ρ Π·ΚΌΡΠ΄Π½Π°Π½Π½Ρ, ΡΠΎ Π½ΡΡΠΈΠΌ Π½Π΅ Π·Π°ΡΠ°Π΄ΠΈΡ,
|
34 |
+
# Ρ
ΡΠ±Π° ΡΠΎ ΠΏΡΠΎΠΏΠΎΠ½ΡΠ²Π°ΡΠΈ ΡΠΊΠ°ΡΠ°ΡΠΈ ΡΠ΅ΠΏΠΎΠ·ΠΈΡΠΎΡΡΠΉ.
|
35 |
+
|
36 |
+
_api = os.environ.get("GEMINI_API_TOKEN")
|
37 |
+
|
38 |
+
|
39 |
+
class Gemini(HFInterface, ABC):
|
40 |
+
"""
|
41 |
+
This class represents a Gemini large language model interface.
|
42 |
+
|
43 |
+
It inherits from `HFInterface` (likely an interface from a Hugging Face library)
|
44 |
+
and `ABC` (for abstract base class) to enforce specific functionalities.
|
45 |
+
"""
|
46 |
+
|
47 |
+
def __init__(self):
|
48 |
+
"""
|
49 |
+
Initializer for the Gemini class.
|
50 |
+
|
51 |
+
- Raises a `ValueError` if the provided API key is None or an empty string.
|
52 |
+
- Creates an instance of `GoogleGenerativeAI` using the specified model name
|
53 |
+
("gemini-1.5-flash") and the stored API key.
|
54 |
+
"""
|
55 |
+
|
56 |
+
if not _api:
|
57 |
+
raise ValueError(f"Your api is None or empty string {_api}, please provide a Gemini API")
|
58 |
+
|
59 |
+
#{
|
60 |
+
# 'model': 'gemini-1.5-flash', 'temperature': 0.7, 'top_p': None,
|
61 |
+
# 'top_k': None, 'max_output_tokens': None, 'candidate_count': 1
|
62 |
+
#}
|
63 |
+
self.llm = GoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=_api)
|
64 |
+
|
65 |
+
def execution(self) -> GoogleGenerativeAI:
|
66 |
+
"""
|
67 |
+
This method attempts to return the underlying `llm` (likely a language model object).
|
68 |
+
|
69 |
+
It wraps the retrieval in a `try-except` block to catch potential exceptions.
|
70 |
+
On success, it returns the `llm` object.
|
71 |
+
On failure, it logs an error message with the exception details using a logger
|
72 |
+
(assumed to be available elsewhere).
|
73 |
+
"""
|
74 |
+
try:
|
75 |
+
return self.llm
|
76 |
+
except Exception as e:
|
77 |
+
logger.error("Something wrong with Gemini api", exc_info=e)
|
78 |
+
print(f"Something wrong with Gemini api: {e}")
|
79 |
+
|
80 |
+
def model_name(self):
|
81 |
+
"""
|
82 |
+
Simple method that returns the hardcoded model name ("gemini-1.5-flash").
|
83 |
+
|
84 |
+
This can be useful for identifying the specific model being used.
|
85 |
+
"""
|
86 |
+
return "gemini-1.5-flash"
|
87 |
+
|
88 |
+
def __str__(self):
|
89 |
+
"""
|
90 |
+
Defines the string representation of the Gemini object for human readability.
|
91 |
+
|
92 |
+
It returns a string indicating that it's a "Gemini model" and appends the model name
|
93 |
+
obtained from the `model_name` method.
|
94 |
+
"""
|
95 |
+
return f"Gemini model: {self.model_name()}"
|
96 |
+
|
97 |
+
def __repr__(self):
|
98 |
+
"""
|
99 |
+
Defines the representation of the Gemini object for debugging purposes.
|
100 |
+
|
101 |
+
It uses `hasattr` to check if the `llm` attribute is set.
|
102 |
+
- If `llm` exists, it returns a string like `Gemini(llm=GoogleGenerativeAI(...))`,
|
103 |
+
showing the class name and the `llm` object information.
|
104 |
+
- If `llm` is not yet set (during initialization), it returns
|
105 |
+
`Gemini(llm=not initialized)`, indicating the state.
|
106 |
+
"""
|
107 |
+
llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
|
108 |
+
return f"{self.__class__.__name__}({llm_info})"
|
109 |
+
|
110 |
+
|
llm/apimodels/hf_model.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
|
4 |
+
from abc import ABC
|
5 |
+
from typing import Any
|
6 |
+
|
7 |
+
from llm.utils.hf_interface import HFInterface
|
8 |
+
from llm.utils.config import config
|
9 |
+
|
10 |
+
from langchain_community.llms import HuggingFaceEndpoint
|
11 |
+
|
12 |
+
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
logger.setLevel(logging.ERROR) # because if something went wrong in execution, application can't be work anyway
|
15 |
+
|
16 |
+
file_handler = logging.FileHandler(
|
17 |
+
"logs/chelsea_llm_huggingfacehub.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{dir_name}.log"
|
18 |
+
logger.setLevel(logging.INFO) # informed
|
19 |
+
|
20 |
+
formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
21 |
+
file_handler.setFormatter(formatted)
|
22 |
+
|
23 |
+
logger.addHandler(file_handler)
|
24 |
+
logger.info("Getting information from apimodel module")
|
25 |
+
|
26 |
+
_api = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
27 |
+
|
28 |
+
class HF_Mistaril(HFInterface, ABC):
|
29 |
+
"""
|
30 |
+
This class represents an interface for the Mistaril large language model from Hugging Face.
|
31 |
+
|
32 |
+
It inherits from `HFInterface` (likely an interface from a Hugging Face library)
|
33 |
+
and `ABC` (for abstract base class) to enforce specific functionalities.
|
34 |
+
"""
|
35 |
+
|
36 |
+
def __init__(self):
|
37 |
+
"""
|
38 |
+
Initializer for the `HF_Mistaril` class.
|
39 |
+
|
40 |
+
- Retrieves configuration values for the Mistaril model from a `config` dictionary:
|
41 |
+
- `repo_id`: The ID of the repository containing the Mistaril model on Hugging Face.
|
42 |
+
- `max_length`: Maximum length of the generated text.
|
43 |
+
- `temperature`: Controls randomness in the generation process.
|
44 |
+
- `top_k`: Restricts the vocabulary used for generation.
|
45 |
+
- Raises a `ValueError` if the `api` key (presumably stored elsewhere) is missing.
|
46 |
+
- Creates an instance of `HuggingFaceEndpoint` using the retrieved configuration
|
47 |
+
and the `api` key.
|
48 |
+
"""
|
49 |
+
|
50 |
+
repo_id = config["HF_Mistrail"]["model"]
|
51 |
+
max_length = config["HF_Mistrail"]["max_new_tokens"]
|
52 |
+
temperature = config["HF_Mistrail"]["temperature"]
|
53 |
+
top_k = config["HF_Mistrail"]["top_k"]
|
54 |
+
|
55 |
+
if not _api:
|
56 |
+
raise ValueError(f"API key not provided {_api}")
|
57 |
+
|
58 |
+
self.llm = HuggingFaceEndpoint(
|
59 |
+
repo_id=repo_id, max_length=max_length, temperature=temperature, top_k=top_k, token=_api
|
60 |
+
)
|
61 |
+
|
62 |
+
def execution(self) -> Any:
|
63 |
+
"""
|
64 |
+
This method attempts to return the underlying `llm` (likely a language model object).
|
65 |
+
|
66 |
+
It wraps the retrieval in a `try-except` block to catch potential exceptions.
|
67 |
+
On success, it returns the `llm` object.
|
68 |
+
On failure, it logs an error message with the exception details using a logger
|
69 |
+
(assumed to be available elsewhere).
|
70 |
+
"""
|
71 |
+
try:
|
72 |
+
return self.llm # `invoke()`
|
73 |
+
except Exception as e:
|
74 |
+
logger.error("Something wrong with API or HuggingFaceEndpoint", exc_info=e)
|
75 |
+
print(f"Something wrong with API or HuggingFaceEndpoint: {e}")
|
76 |
+
|
77 |
+
def model_name(self):
|
78 |
+
"""
|
79 |
+
Simple method that returns the Mistaril model name from the configuration.
|
80 |
+
|
81 |
+
This can be useful for identifying the specific model being used.
|
82 |
+
"""
|
83 |
+
return config["HF_Mistrail"]["model"]
|
84 |
+
|
85 |
+
def __str__(self):
|
86 |
+
"""
|
87 |
+
Defines the string representation of the `HF_Mistaril` object for human readability.
|
88 |
+
|
89 |
+
It combines the class name and the model name retrieved from the `model_name` method
|
90 |
+
with an underscore separator.
|
91 |
+
"""
|
92 |
+
return f"{self.__class__.__name__}_{self.model_name()}"
|
93 |
+
|
94 |
+
def __repr__(self):
|
95 |
+
"""
|
96 |
+
Defines the representation of the `HF_Mistaril` object for debugging purposes.
|
97 |
+
|
98 |
+
It uses `hasattr` to check if the `llm` attribute is set.
|
99 |
+
- If `llm` exists, it returns a string like `HF_Mistaril(llm=HuggingFaceEndpoint(...))`,
|
100 |
+
showing the class name and the `llm` object information.
|
101 |
+
- If `llm` is not yet set (during initialization), it returns
|
102 |
+
`HF_Mistaril(llm=not initialized)`, indicating the state.
|
103 |
+
"""
|
104 |
+
llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
|
105 |
+
return f"{self.__class__.__name__}({llm_info})"
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
class HF_TinyLlama(HFInterface, ABC):
|
110 |
+
"""
|
111 |
+
This class represents an interface for the TinyLlama large language model from Hugging Face.
|
112 |
+
|
113 |
+
It inherits from `HFInterface` (likely an interface from a Hugging Face library)
|
114 |
+
and `ABC` (for abstract base class) to enforce specific functionalities.
|
115 |
+
"""
|
116 |
+
|
117 |
+
def __init__(self):
|
118 |
+
"""
|
119 |
+
Initializer for the `HF_TinyLlama` class.
|
120 |
+
|
121 |
+
- Retrieves configuration values for the Mistaril model from a `config` dictionary:
|
122 |
+
- `repo_id`: The ID of the repository containing the TinyLlama model on Hugging Face.
|
123 |
+
- `max_length`: Maximum length of the generated text.
|
124 |
+
- `temperature`: Controls randomness in the generation process.
|
125 |
+
- `top_k`: Restricts the vocabulary used for generation.
|
126 |
+
- Raises a `ValueError` if the `api` key (presumably stored elsewhere) is missing.
|
127 |
+
- Creates an instance of `HuggingFaceEndpoint` using the retrieved configuration
|
128 |
+
and the `api` key.
|
129 |
+
"""
|
130 |
+
|
131 |
+
repo_id = config["HF_TinyLlama"]["model"]
|
132 |
+
max_length = config["HF_TinyLlama"]["max_new_tokens"]
|
133 |
+
temperature = config["HF_TinyLlama"]["temperature"]
|
134 |
+
top_k = config["HF_TinyLlama"]["top_k"]
|
135 |
+
|
136 |
+
if not _api:
|
137 |
+
raise ValueError(f"API key not provided {_api}")
|
138 |
+
|
139 |
+
self.llm = HuggingFaceEndpoint(
|
140 |
+
repo_id=repo_id, max_length=max_length, temperature=temperature, top_k=top_k, token=_api
|
141 |
+
)
|
142 |
+
|
143 |
+
def execution(self) -> Any:
|
144 |
+
"""
|
145 |
+
This method attempts to return the underlying `llm` (likely a language model object).
|
146 |
+
|
147 |
+
It wraps the retrieval in a `try-except` block to catch potential exceptions.
|
148 |
+
On success, it returns the `llm` object.
|
149 |
+
On failure, it logs an error message with the exception details using a logger
|
150 |
+
(assumed to be available elsewhere).
|
151 |
+
"""
|
152 |
+
try:
|
153 |
+
return self.llm
|
154 |
+
except Exception as e:
|
155 |
+
logger.error("Something wrong with API or HuggingFaceEndpoint", exc_info=e)
|
156 |
+
print(f"Something wrong with API or HuggingFaceEndpoint: {e}")
|
157 |
+
|
158 |
+
def model_name(self):
|
159 |
+
"""
|
160 |
+
Simple method that returns the TinyLlama model name from the configuration.
|
161 |
+
|
162 |
+
This can be useful for identifying the specific model being used.
|
163 |
+
"""
|
164 |
+
return config["HF_TinyLlama"]["model"]
|
165 |
+
|
166 |
+
def __str__(self):
|
167 |
+
"""
|
168 |
+
Defines the string representation of the `HF_TinyLlama` object for human readability.
|
169 |
+
|
170 |
+
It combines the class name and the model name retrieved from the `model_name` method
|
171 |
+
with an underscore separator.
|
172 |
+
"""
|
173 |
+
return f"{self.__class__.__name__}_{self.model_name()}"
|
174 |
+
|
175 |
+
def __repr__(self):
|
176 |
+
"""
|
177 |
+
Defines the representation of the `HF_TinyLlama` object for debugging purposes.
|
178 |
+
|
179 |
+
It uses `hasattr` to check if the `llm` attribute is set.
|
180 |
+
- If `llm` exists, it returns a string like `HF_TinyLlama(llm=HuggingFaceEndpoint(...))`,
|
181 |
+
showing the class name and the `llm` object information.
|
182 |
+
- If `llm` is not yet set (during initialization), it returns
|
183 |
+
`HF_TinyLlama(llm=not initialized)`, indicating the state.
|
184 |
+
"""
|
185 |
+
llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
|
186 |
+
return f"{self.__class__.__name__}({llm_info})"
|
llm/huggingfacehub/hf_model.py
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import yaml
|
3 |
-
import logging
|
4 |
-
|
5 |
-
from abc import ABC
|
6 |
-
|
7 |
-
from llm.hf_interface import HFInterface
|
8 |
-
from llm.config import config
|
9 |
-
|
10 |
-
from langchain.prompts import PromptTemplate
|
11 |
-
from langchain.chains import LLMChain
|
12 |
-
from langchain.llms import HuggingFaceHub
|
13 |
-
|
14 |
-
logger = logging.getLogger(__name__)
|
15 |
-
|
16 |
-
logger.setLevel(logging.CRITICAL) # because if something went wrong in execution, application can't be work anyway
|
17 |
-
|
18 |
-
file_handler = logging.FileHandler(
|
19 |
-
"logs/chelsea_llm_huggingfacehub.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{dir_name}.log"
|
20 |
-
logger.setLevel(logging.INFO) # informed
|
21 |
-
|
22 |
-
formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
23 |
-
file_handler.setFormatter(formatted)
|
24 |
-
|
25 |
-
logger.addHandler(file_handler)
|
26 |
-
|
27 |
-
logger.info("Getting information from hf_model module")
|
28 |
-
|
29 |
-
llm_dir = '/home/user/app/llm/'
|
30 |
-
|
31 |
-
path_to_yaml = os.path.join(os.getcwd(), "llm/prompts.yaml")
|
32 |
-
|
33 |
-
print("Path to prompts : ", path_to_yaml)
|
34 |
-
|
35 |
-
|
36 |
-
class HF_Mistaril(HFInterface, ABC):
|
37 |
-
def __init__(self, prompt_entity: str, prompt_id: int = 0):
|
38 |
-
self.prompt_entity = prompt_entity
|
39 |
-
self.prompt_id = prompt_id
|
40 |
-
|
41 |
-
self.model_config = config["HF_Mistrail"]
|
42 |
-
|
43 |
-
# ΠΠΎΠ΄Π°ΡΠΈ repetition_penalty, task?, top_p, stop_sequences
|
44 |
-
self.llm = HuggingFaceHub(
|
45 |
-
repo_id=self.model_config["model"],
|
46 |
-
# temperature=self.model_config["temperature"],
|
47 |
-
# max_new_tokens=self.model_config["max_new_tokens"],
|
48 |
-
# top_k=self.model_config["top_k"],
|
49 |
-
model_kwargs={"load_in_8bit": self.model_config["load_in_8bit"],
|
50 |
-
"temperature": self.model_config["temperature"],
|
51 |
-
"max_new_tokens": self.model_config["max_new_tokens"],
|
52 |
-
"top_k": self.model_config["top_k"],
|
53 |
-
},
|
54 |
-
huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
55 |
-
)
|
56 |
-
|
57 |
-
@staticmethod
|
58 |
-
def __read_yaml():
|
59 |
-
try:
|
60 |
-
yaml_file = os.path.join(llm_dir, 'prompts.yaml')
|
61 |
-
with open(yaml_file, 'r') as f:
|
62 |
-
data = yaml.safe_load(f)
|
63 |
-
f.close()
|
64 |
-
return data
|
65 |
-
except Exception as e:
|
66 |
-
print(f"Execution filed : {e}")
|
67 |
-
logger.error(msg="Execution filed", exc_info=e)
|
68 |
-
|
69 |
-
def execution(self):
|
70 |
-
try:
|
71 |
-
data = self.__read_yaml()
|
72 |
-
prompts = data["prompts"][
|
73 |
-
self.prompt_id] #get second prompt from yaml, need change id parameter to get other prompt
|
74 |
-
template = prompts["prompt_template"]
|
75 |
-
prompt = PromptTemplate(template=template, input_variables=["entity"])
|
76 |
-
llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
|
77 |
-
output = llm_chain.invoke(self.prompt_entity)
|
78 |
-
return output["text"]
|
79 |
-
except Exception as e:
|
80 |
-
print(f"Execution filed : {e}")
|
81 |
-
logger.critical(msg="Execution filed", exc_info=e)
|
82 |
-
|
83 |
-
def __str__(self):
|
84 |
-
return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
|
85 |
-
|
86 |
-
def __repr__(self):
|
87 |
-
return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
|
88 |
-
|
89 |
-
|
90 |
-
class HF_TinyLlama(HFInterface, ABC):
|
91 |
-
def __init__(self, prompt_entity: str, prompt_id: int = 0):
|
92 |
-
self.prompt_entity = prompt_entity
|
93 |
-
self.prompt_id = prompt_id
|
94 |
-
|
95 |
-
self.model_config = config["HF_TinyLlama"]
|
96 |
-
|
97 |
-
self.llm = HuggingFaceHub(
|
98 |
-
repo_id=self.model_config["model"],
|
99 |
-
# temperature=self.model_config["temperature"],
|
100 |
-
# max_new_tokens=self.model_config["max_new_tokens"],
|
101 |
-
# top_k=self.model_config["top_k"],
|
102 |
-
model_kwargs={"load_in_8bit": self.model_config["load_in_8bit"],
|
103 |
-
"temperature": self.model_config["temperature"],
|
104 |
-
"max_new_tokens": self.model_config["max_new_tokens"],
|
105 |
-
"top_k": self.model_config["top_k"],
|
106 |
-
},
|
107 |
-
huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
108 |
-
)
|
109 |
-
|
110 |
-
@staticmethod
|
111 |
-
def __read_yaml():
|
112 |
-
try:
|
113 |
-
yaml_file = os.path.join(llm_dir, 'prompts.yaml')
|
114 |
-
with open(yaml_file, 'r') as f:
|
115 |
-
data = yaml.safe_load(f)
|
116 |
-
f.close()
|
117 |
-
return data
|
118 |
-
except Exception as e:
|
119 |
-
print(f"Execution filed : {e}")
|
120 |
-
logger.error(msg="Execution filed", exc_info=e)
|
121 |
-
|
122 |
-
def execution(self):
|
123 |
-
try:
|
124 |
-
data = self.__read_yaml()
|
125 |
-
prompts = data["prompts"][
|
126 |
-
self.prompt_id] #get second prompt from yaml, need change id parameter to get other prompt
|
127 |
-
template = prompts["prompt_template"]
|
128 |
-
prompt = PromptTemplate(template=template, input_variables=["entity"])
|
129 |
-
llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
|
130 |
-
output = llm_chain.invoke(self.prompt_entity)
|
131 |
-
return output["text"]
|
132 |
-
except Exception as e:
|
133 |
-
print(f"Execution filed : {e}")
|
134 |
-
logger.critical(msg="Execution filed", exc_info=e)
|
135 |
-
|
136 |
-
def __str__(self):
|
137 |
-
return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
|
138 |
-
|
139 |
-
def __repr__(self):
|
140 |
-
return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm/llamacpp/lc_model.py
CHANGED
@@ -3,20 +3,14 @@ import logging
|
|
3 |
from abc import ABC
|
4 |
|
5 |
import requests
|
6 |
-
import yaml
|
7 |
-
from langchain.prompts import PromptTemplate
|
8 |
-
from langchain_community.llms import LlamaCpp
|
9 |
-
|
10 |
-
from llm.config import config
|
11 |
-
from llm.lc_interface import LCInterface
|
12 |
|
13 |
-
|
14 |
|
15 |
-
|
|
|
16 |
|
17 |
logger = logging.getLogger(__name__)
|
18 |
-
|
19 |
-
logger.setLevel(logging.CRITICAL) # because if something went wrong in execution application can't be work anymore
|
20 |
|
21 |
file_handler = logging.FileHandler(
|
22 |
"logs/chelsea_llm_llamacpp.log") # for all modules template for logs file is "logs/chelsea_{module_name}_{dir_name}.log"
|
@@ -24,27 +18,16 @@ logger.setLevel(logging.INFO) # informed
|
|
24 |
|
25 |
formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
26 |
file_handler.setFormatter(formatted)
|
27 |
-
|
28 |
logger.addHandler(file_handler)
|
29 |
|
30 |
-
try:
|
31 |
-
os.chdir('/home/user/app/llm/')
|
32 |
-
except FileNotFoundError:
|
33 |
-
print("Error: Could not move up. You might be at the root directory.")
|
34 |
-
|
35 |
work_dir = os.getcwd()
|
36 |
-
|
37 |
-
models_dir = os.path.join(work_dir, "models")
|
38 |
|
39 |
|
40 |
class LC_TinyLlama(LCInterface, ABC):
|
41 |
-
def __init__(self
|
42 |
-
self.prompt_entity = prompt_entity
|
43 |
-
self.prompt_id = prompt_id
|
44 |
-
|
45 |
self.model_config = config["LC_TinyLlama-1.1B-Chat-v1.0-GGUF"]
|
46 |
|
47 |
-
|
48 |
try:
|
49 |
get_file = requests.get(self.model_config["model_url"])
|
50 |
if get_file.status_code == 200:
|
@@ -60,43 +43,23 @@ class LC_TinyLlama(LCInterface, ABC):
|
|
60 |
print(f"Error while writing a file to directory : {e}")
|
61 |
logger.error(msg="Error while write a file to directory", exc_info=e)
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
logger.error(msg="Execution filed", exc_info=e)
|
73 |
|
74 |
def execution(self):
|
75 |
try:
|
76 |
-
|
77 |
-
prompts = data["prompts"][
|
78 |
-
self.prompt_id] # to get second prompt from yaml, need change id parameter to get other prompt
|
79 |
-
template = prompts["prompt_template"]
|
80 |
-
prompt = PromptTemplate(template=template, input_variables=["entity"])
|
81 |
-
|
82 |
-
llm = LlamaCpp(
|
83 |
-
model_path=os.path.join(models_dir, self.model_config["model_name"]),
|
84 |
-
temperature=self.model_config["temperature"],
|
85 |
-
max_tokens=self.model_config["max_tokens"],
|
86 |
-
top_p=self.model_config["top_p"],
|
87 |
-
top_k=self.model_config["top_k"],
|
88 |
-
# callback_manager=callback_manager,
|
89 |
-
verbose=True, # Verbose is required to pass to the callback manager
|
90 |
-
)
|
91 |
-
|
92 |
-
logger.info(f"Check llm : {llm}")
|
93 |
-
|
94 |
-
llm_chain = prompt | llm
|
95 |
-
output = llm_chain.invoke({"question": self.prompt_entity})
|
96 |
-
return output
|
97 |
except Exception as e:
|
98 |
-
print(f"Execution filed : {e}")
|
99 |
-
logger.critical(msg="Execution filed", exc_info=e)
|
|
|
100 |
|
101 |
def clear_llm(self, unused_model_dict, current_lc):
|
102 |
# If unused_model_dict is not empty
|
@@ -108,8 +71,10 @@ class LC_TinyLlama(LCInterface, ABC):
|
|
108 |
# delete files from models directory except of current_lc
|
109 |
os.remove(value)
|
110 |
logger.info(f"Successfully deleted file {value}")
|
|
|
111 |
else:
|
112 |
logger.info(f"Unfortunately dictionary empty or None")
|
|
|
113 |
|
114 |
def get_unused(self, current_lc):
|
115 |
|
@@ -121,21 +86,22 @@ class LC_TinyLlama(LCInterface, ABC):
|
|
121 |
return {item: unused_model_file}
|
122 |
else:
|
123 |
return None
|
|
|
|
|
|
|
124 |
|
125 |
def __str__(self):
|
126 |
-
return f"
|
127 |
|
128 |
def __repr__(self):
|
129 |
-
|
130 |
-
|
|
|
131 |
|
132 |
class LC_Phi3(LCInterface, ABC):
|
133 |
-
def __init__(self
|
134 |
-
self.prompt_entity = prompt_entity
|
135 |
-
self.prompt_id = prompt_id
|
136 |
-
|
137 |
self.model_config = config["LC_Phi-3-mini-4k-instruct-gguf"]
|
138 |
-
|
139 |
try:
|
140 |
get_file = requests.get(self.model_config["model_url"])
|
141 |
if get_file.status_code == 200:
|
@@ -143,50 +109,33 @@ class LC_Phi3(LCInterface, ABC):
|
|
143 |
with open(path_to_model, "wb") as f:
|
144 |
f.write(get_file.content)
|
145 |
logger.info("Model file successfully recorded")
|
|
|
146 |
f.close()
|
147 |
except FileExistsError:
|
|
|
148 |
logger.info(f"Model file {path_to_model} already exists. Skipping download.")
|
149 |
except OSError as e:
|
150 |
print(f"Error while writing a file to directory : {e}")
|
151 |
logger.error(msg="Error while write a file to directory", exc_info=e)
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
|
164 |
def execution(self):
|
165 |
try:
|
166 |
-
|
167 |
-
prompts = data["prompts"][
|
168 |
-
self.prompt_id] # get second prompt from yaml, need change id parameter to get other prompt
|
169 |
-
template = prompts["prompt_template"]
|
170 |
-
prompt = PromptTemplate(template=template, input_variables=["entity"])
|
171 |
-
|
172 |
-
llm = LlamaCpp(
|
173 |
-
model_path=os.path.join(models_dir, self.model_config["model_name"]),
|
174 |
-
temperature=self.model_config["temperature"],
|
175 |
-
max_tokens=self.model_config["max_tokens"],
|
176 |
-
top_p=self.model_config["top_p"],
|
177 |
-
top_k=self.model_config["top_k"],
|
178 |
-
# callback_manager=callback_manager,
|
179 |
-
verbose=True, # Verbose is required to pass to the callback manager
|
180 |
-
)
|
181 |
-
|
182 |
-
logger.info(f"Check llm : {llm}")
|
183 |
-
|
184 |
-
llm_chain = prompt | llm
|
185 |
-
output = llm_chain.invoke({"question": self.prompt_entity})
|
186 |
-
return output
|
187 |
except Exception as e:
|
188 |
-
print(f"Execution filed : {e}")
|
189 |
-
logger.critical(msg="Execution filed", exc_info=e)
|
|
|
190 |
|
191 |
def clear_llm(self, unused_model_dict, current_lc):
|
192 |
# If unused_model_dict is not empty
|
@@ -198,8 +147,10 @@ class LC_Phi3(LCInterface, ABC):
|
|
198 |
# delete files from models directory except of current_lc
|
199 |
os.remove(value)
|
200 |
logger.info(f"Successfully deleted file {value}")
|
|
|
201 |
else:
|
202 |
logger.info(f"Unfortunately dictionary empty or None")
|
|
|
203 |
|
204 |
def get_unused(self, current_lc):
|
205 |
|
@@ -211,9 +162,13 @@ class LC_Phi3(LCInterface, ABC):
|
|
211 |
return {item: unused_model_file}
|
212 |
else:
|
213 |
return None
|
|
|
|
|
|
|
214 |
|
215 |
def __str__(self):
|
216 |
-
return f"
|
217 |
|
218 |
def __repr__(self):
|
219 |
-
|
|
|
|
3 |
from abc import ABC
|
4 |
|
5 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
from langchain_community.llms import LlamaCpp
|
8 |
|
9 |
+
from llm.utils.config import config
|
10 |
+
from llm.utils.lc_interface import LCInterface
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
+
logger.setLevel(logging.ERROR) # because if something went wrong in execution application can't be work anymore
|
|
|
14 |
|
15 |
file_handler = logging.FileHandler(
|
16 |
"logs/chelsea_llm_llamacpp.log") # for all modules template for logs file is "logs/chelsea_{module_name}_{dir_name}.log"
|
|
|
18 |
|
19 |
formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
20 |
file_handler.setFormatter(formatted)
|
|
|
21 |
logger.addHandler(file_handler)
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
work_dir = os.getcwd()
|
24 |
+
models_dir = os.path.join(work_dir, "llm/models")
|
|
|
25 |
|
26 |
|
27 |
class LC_TinyLlama(LCInterface, ABC):
|
28 |
+
def __init__(self):
|
|
|
|
|
|
|
29 |
self.model_config = config["LC_TinyLlama-1.1B-Chat-v1.0-GGUF"]
|
30 |
|
|
|
31 |
try:
|
32 |
get_file = requests.get(self.model_config["model_url"])
|
33 |
if get_file.status_code == 200:
|
|
|
43 |
print(f"Error while writing a file to directory : {e}")
|
44 |
logger.error(msg="Error while write a file to directory", exc_info=e)
|
45 |
|
46 |
+
self.llm = LlamaCpp(
|
47 |
+
model_path=os.path.join(models_dir, self.model_config["model_name"]),
|
48 |
+
temperature=self.model_config["temperature"],
|
49 |
+
max_tokens=self.model_config["max_tokens"],
|
50 |
+
top_p=self.model_config["top_p"],
|
51 |
+
top_k=self.model_config["top_k"],
|
52 |
+
# callback_manager=callback_manager,
|
53 |
+
verbose=True, # Verbose is required to pass to the callback manager
|
54 |
+
)
|
|
|
55 |
|
56 |
def execution(self):
|
57 |
try:
|
58 |
+
return self.llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
except Exception as e:
|
60 |
+
print(f"Execution filed in LC_TinyLlama execution function: {e}")
|
61 |
+
logger.critical(msg="Execution filed in LC_TinyLlama execution function", exc_info=e)
|
62 |
+
return None
|
63 |
|
64 |
def clear_llm(self, unused_model_dict, current_lc):
|
65 |
# If unused_model_dict is not empty
|
|
|
71 |
# delete files from models directory except of current_lc
|
72 |
os.remove(value)
|
73 |
logger.info(f"Successfully deleted file {value}")
|
74 |
+
print(f"Successfully deleted file {value}")
|
75 |
else:
|
76 |
logger.info(f"Unfortunately dictionary empty or None")
|
77 |
+
print(f"Unfortunately dictionary {unused_model_dict} empty or None")
|
78 |
|
79 |
def get_unused(self, current_lc):
|
80 |
|
|
|
86 |
return {item: unused_model_file}
|
87 |
else:
|
88 |
return None
|
89 |
+
|
90 |
+
def model_name(self):
|
91 |
+
return self.model_config["model_name"]
|
92 |
|
93 |
def __str__(self):
|
94 |
+
return f"{self.__class__.__name__}_{self.model_name()}"
|
95 |
|
96 |
def __repr__(self):
|
97 |
+
llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
|
98 |
+
return f"{self.__class__.__name__}({llm_info})"
|
99 |
+
|
100 |
|
101 |
class LC_Phi3(LCInterface, ABC):
|
102 |
+
def __init__(self):
|
|
|
|
|
|
|
103 |
self.model_config = config["LC_Phi-3-mini-4k-instruct-gguf"]
|
104 |
+
|
105 |
try:
|
106 |
get_file = requests.get(self.model_config["model_url"])
|
107 |
if get_file.status_code == 200:
|
|
|
109 |
with open(path_to_model, "wb") as f:
|
110 |
f.write(get_file.content)
|
111 |
logger.info("Model file successfully recorded")
|
112 |
+
print("Model file successfully recorded")
|
113 |
f.close()
|
114 |
except FileExistsError:
|
115 |
+
print(f"Model file {path_to_model} already exists. Skipping download.")
|
116 |
logger.info(f"Model file {path_to_model} already exists. Skipping download.")
|
117 |
except OSError as e:
|
118 |
print(f"Error while writing a file to directory : {e}")
|
119 |
logger.error(msg="Error while write a file to directory", exc_info=e)
|
120 |
|
121 |
+
self.llm = LlamaCpp(
|
122 |
+
model_path=os.path.join(models_dir, self.model_config["model_name"]),
|
123 |
+
temperature=self.model_config["temperature"],
|
124 |
+
max_tokens=self.model_config["max_tokens"],
|
125 |
+
top_p=self.model_config["top_p"],
|
126 |
+
top_k=self.model_config["top_k"],
|
127 |
+
# callback_manager=callback_manager,
|
128 |
+
verbose=True, # Verbose is required to pass to the callback manager
|
129 |
+
)
|
130 |
+
|
131 |
|
132 |
def execution(self):
|
133 |
try:
|
134 |
+
return self.llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
except Exception as e:
|
136 |
+
print(f"Execution filed in LC_Phi3 execution function: {e}")
|
137 |
+
logger.critical(msg="Execution filed in LC_Phi3 execution function:", exc_info=e)
|
138 |
+
return None
|
139 |
|
140 |
def clear_llm(self, unused_model_dict, current_lc):
|
141 |
# If unused_model_dict is not empty
|
|
|
147 |
# delete files from models directory except of current_lc
|
148 |
os.remove(value)
|
149 |
logger.info(f"Successfully deleted file {value}")
|
150 |
+
print(f"Successfully deleted file {value}")
|
151 |
else:
|
152 |
logger.info(f"Unfortunately dictionary empty or None")
|
153 |
+
print(f"Unfortunately dictionary {unused_model_dict} empty or None")
|
154 |
|
155 |
def get_unused(self, current_lc):
|
156 |
|
|
|
162 |
return {item: unused_model_file}
|
163 |
else:
|
164 |
return None
|
165 |
+
|
166 |
+
def model_name(self):
|
167 |
+
return self.model_config["model_name"]
|
168 |
|
169 |
def __str__(self):
|
170 |
+
return f"{self.__class__.__name__}_{self.model_name()}"
|
171 |
|
172 |
def __repr__(self):
|
173 |
+
llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
|
174 |
+
return f"{self.__class__.__name__}({llm_info})"
|
llm/llm_factory.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
from typing import Dict
|
2 |
-
|
3 |
-
from .huggingfacehub.hf_model import HF_Mistaril, HF_TinyLlama
|
4 |
-
from .llamacpp.lc_model import LC_TinyLlama, LC_Phi3
|
5 |
-
|
6 |
-
|
7 |
-
class LLM_Factory:
|
8 |
-
|
9 |
-
# trigger = {"model_type": "execution_type"} -> {"hf": "small"}
|
10 |
-
@staticmethod
|
11 |
-
def create_llm(prompt_entity: str, prompt_id: int, trigger: Dict[str, str]):
|
12 |
-
|
13 |
-
print(trigger)
|
14 |
-
|
15 |
-
for key, value in trigger.items():
|
16 |
-
if key == "hf" and value == "effective":
|
17 |
-
model = HF_Mistaril(prompt_entity=prompt_entity, prompt_id=prompt_id)
|
18 |
-
elif key == "hf" and value == "small":
|
19 |
-
model = HF_TinyLlama(prompt_entity=prompt_entity, prompt_id=prompt_id)
|
20 |
-
elif key == "lc" and value == "effective":
|
21 |
-
model = LC_Phi3(prompt_entity=prompt_entity, prompt_id=prompt_id)
|
22 |
-
elif key == "lc" and value == "small":
|
23 |
-
model = LC_TinyLlama(prompt_entity=prompt_entity, prompt_id=prompt_id)
|
24 |
-
else:
|
25 |
-
model = None
|
26 |
-
|
27 |
-
return model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm/models/tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a4c9bb1dbaa372f6381a035fa5c02ef087aaa1ff1f843a56a22328114f03fc59
|
3 |
-
size 1170781568
|
|
|
|
|
|
|
|
llm/prompts.yaml
DELETED
@@ -1,76 +0,0 @@
|
|
1 |
-
# https://www.promptingguide.ai/techniques/
|
2 |
-
|
3 |
-
# Add more examples of the template here, id should be an interval from 1 to 100_000
|
4 |
-
# The description provides a basic overview of the template, the description should include the level of difficulty,
|
5 |
-
# the name of the prompting method and end with the word "prompt", e.g. "simple short prompt" or "simple zero-shot thought chain prompt
|
6 |
-
# The prompt is rated from 0 to 10
|
7 |
-
|
8 |
-
prompts:
|
9 |
-
- id: 1
|
10 |
-
prompt_template: |
|
11 |
-
Question: {question}
|
12 |
-
Answer:
|
13 |
-
description: simple question without a prompt
|
14 |
-
rate: 1
|
15 |
-
|
16 |
-
- id: 2
|
17 |
-
prompt_template: |
|
18 |
-
Question: {question}
|
19 |
-
Answer: Write a concise answer on the question with one example if it's possible. CONCISE ANSWER.
|
20 |
-
description: simple concise prompt
|
21 |
-
rate: 3
|
22 |
-
|
23 |
-
# useful for solving simple math task
|
24 |
-
- id: 3
|
25 |
-
prompt_template: |
|
26 |
-
Let's think step by step.
|
27 |
-
Question: {question}
|
28 |
-
Answer:
|
29 |
-
description: simple zero-shot chain-of-thoughts prompt
|
30 |
-
rate: 5
|
31 |
-
|
32 |
-
# another one example for solving simple math task
|
33 |
-
- id: 4
|
34 |
-
prompt_template: |
|
35 |
-
Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls.
|
36 |
-
Each can has 3 tennis balls. How many tennis balls does he have now?
|
37 |
-
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
|
38 |
-
Question: {question}
|
39 |
-
Answer:
|
40 |
-
description: simple few-shot chain-of-thoughts prompt
|
41 |
-
rate: 5
|
42 |
-
|
43 |
-
- id: 5
|
44 |
-
prompt_template: |
|
45 |
-
Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done,
|
46 |
-
there will be 21 trees. How many trees did the grove workers plant today?
|
47 |
-
A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted.
|
48 |
-
So, they must have planted 21 - 15 = 6 trees. The answer is 6.
|
49 |
-
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
|
50 |
-
A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.
|
51 |
-
Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
|
52 |
-
A: Leah had 32 chocolates and Leahβs sister had 42. That means there were originally 32 + 42 = 74
|
53 |
-
chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.
|
54 |
-
Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops
|
55 |
-
did Jason give to Denny?
|
56 |
-
A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of
|
57 |
-
lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.
|
58 |
-
Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does
|
59 |
-
he have now?
|
60 |
-
A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so
|
61 |
-
in total he has 7 + 2 = 9 toys. The answer is 9.
|
62 |
-
Q: There were nine computers in the server room. Five more computers were installed each day, from
|
63 |
-
monday to thursday. How many computers are now in the server room?
|
64 |
-
A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 =
|
65 |
-
20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers.
|
66 |
-
The answer is 29.
|
67 |
-
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many
|
68 |
-
golf balls did he have at the end of wednesday?
|
69 |
-
A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On
|
70 |
-
Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.
|
71 |
-
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
|
72 |
-
A: She bought 5 bagels for $3 each. This means she spent $15. She has $8 left.
|
73 |
-
Question: {question}
|
74 |
-
Answer:
|
75 |
-
description: simple self-consistency prompt
|
76 |
-
rate: 6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm/utils/chat.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import logging
|
3 |
+
|
4 |
+
from llm.apimodels.gemini_model import Gemini
|
5 |
+
from llm.apimodels.hf_model import HF_Mistaril, HF_TinyLlama
|
6 |
+
from llm.llamacpp.lc_model import LC_Phi3, LC_TinyLlama
|
7 |
+
|
8 |
+
from typing import Optional, Any
|
9 |
+
|
10 |
+
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
|
11 |
+
from langchain.chains import ConversationChain
|
12 |
+
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
logger.setLevel(logging.WARNING)
|
15 |
+
|
16 |
+
file_handler = logging.FileHandler(
|
17 |
+
"logs/chelsea_llm_chat.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{entity}.log"
|
18 |
+
logger.setLevel(logging.INFO) # informed
|
19 |
+
|
20 |
+
formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
21 |
+
file_handler.setFormatter(formatted)
|
22 |
+
|
23 |
+
logger.addHandler(file_handler)
|
24 |
+
logger.info("Getting information from chat module")
|
25 |
+
|
26 |
+
def prettify(raw_text: str) -> str:
|
27 |
+
pretty = raw_text.replace("**", "")
|
28 |
+
return pretty.strip()
|
29 |
+
|
30 |
+
# option + command + F -> replace
|
31 |
+
|
32 |
+
memory: ConversationBufferWindowMemory = ConversationBufferWindowMemory(k=3, ai_prefix="Chelsea")
|
33 |
+
|
34 |
+
DELAY: int = 300 # 5 minutes
|
35 |
+
|
36 |
+
def has_failed(conversation, prompt) -> Optional[str]:
|
37 |
+
"""
|
38 |
+
Checks if the LLM conversation prediction fails and returns None if so.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
conversation: The LLM conversation object used for prediction.
|
42 |
+
prompt: The prompt to be used for prediction.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
None, otherwise the prettified response.
|
46 |
+
"""
|
47 |
+
|
48 |
+
try:
|
49 |
+
response = conversation.predict(input=prompt)
|
50 |
+
print(f"response: {response}")
|
51 |
+
result = prettify(raw_text=response)
|
52 |
+
return result
|
53 |
+
except Exception as e:
|
54 |
+
logger.error(msg="Error during prediction with conversation in has_failed function", exc_info=e)
|
55 |
+
print(f"Error during prediction with conversation in has_failed function: {e}")
|
56 |
+
return None
|
57 |
+
|
58 |
+
|
59 |
+
def has_delay(conversation, prompt) -> Optional[str]:
|
60 |
+
"""
|
61 |
+
Checks if the LLM conversation prediction takes longer than a set delay.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
conversation: The LLM conversation object used for prediction.
|
65 |
+
prompt: The prompt to be used for prediction.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
None if the execution time exceeds the delay,
|
69 |
+
otherwise, the prettified response from the conversation object.
|
70 |
+
"""
|
71 |
+
|
72 |
+
start_time = time.perf_counter() # Start timer before prediction
|
73 |
+
try:
|
74 |
+
response = conversation.predict(input=prompt)
|
75 |
+
execution_time = time.perf_counter() - start_time # Calculate execution time
|
76 |
+
|
77 |
+
if execution_time > DELAY:
|
78 |
+
return None # Return None if delayed
|
79 |
+
|
80 |
+
result = prettify(raw_text=response) # Prettify the response
|
81 |
+
return result # Return the prettified response
|
82 |
+
|
83 |
+
except Exception as e:
|
84 |
+
logger.error(msg="Error during prediction with conversation in has_delay function", exc_info=e)
|
85 |
+
print(f"Error during prediction with conversation in has_delay function: {e}")
|
86 |
+
|
87 |
+
|
88 |
+
class Conversation:
|
89 |
+
def __init__(self):
|
90 |
+
"""
|
91 |
+
Initializes the Conversation class with a prompt and a list of LLM model classes.
|
92 |
+
|
93 |
+
Args:
|
94 |
+
model_classes (list, optional): A list of LLM model classes to try in sequence.
|
95 |
+
Defaults to [Gemini, HF_Mistaril, HF_TinyLlama, LC_Phi3, LC_TinyLlama].
|
96 |
+
"""
|
97 |
+
|
98 |
+
self.model_classes = [Gemini, HF_Mistaril, HF_TinyLlama, LC_Phi3, LC_TinyLlama]
|
99 |
+
self.current_model_index = 0
|
100 |
+
|
101 |
+
def _get_conversation(self) -> Any:
|
102 |
+
"""
|
103 |
+
Creates a ConversationChain object using the current model class.
|
104 |
+
"""
|
105 |
+
try:
|
106 |
+
current_model_class = self.model_classes[self.current_model_index]
|
107 |
+
print("current model class is: ", current_model_class)
|
108 |
+
return ConversationChain(llm=current_model_class().execution(), memory=memory, return_final_only=True)
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(msg="Error during conversation chain in get_conversation function", exc_info=e)
|
111 |
+
print(f"Error during conversation chain in get_conversation function: {e}")
|
112 |
+
|
113 |
+
def chatting(self, prompt: str) -> str:
|
114 |
+
"""
|
115 |
+
Carries out the conversation with the user, handling errors and delays.
|
116 |
+
|
117 |
+
Args:
|
118 |
+
prompt(str): The prompt to be used for prediction.
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
Optional[str]: The final conversation response or None if all models fail.
|
122 |
+
"""
|
123 |
+
|
124 |
+
if prompt is None or prompt == "":
|
125 |
+
raise Exception(f"Prompt must be string not None or empty string: {prompt}")
|
126 |
+
|
127 |
+
while self.current_model_index < len(self.model_classes):
|
128 |
+
conversation = self._get_conversation()
|
129 |
+
|
130 |
+
result = has_failed(conversation=conversation, prompt=prompt)
|
131 |
+
if result is not None:
|
132 |
+
return result
|
133 |
+
print(f"chat - chatting result : {result}")
|
134 |
+
|
135 |
+
result = has_delay(conversation=conversation, prompt=prompt)
|
136 |
+
if result is None:
|
137 |
+
self.current_model_index += 1 # Switch to next model after delay
|
138 |
+
continue
|
139 |
+
|
140 |
+
return result
|
141 |
+
|
142 |
+
return "All models failed conversation. Please, try again"
|
143 |
+
|
144 |
+
def __str__(self) -> str:
|
145 |
+
return f"prompt: {type(self.prompt)}"
|
146 |
+
|
147 |
+
def __repr__(self) -> str:
|
148 |
+
return f"{self.__class__.__name__}(prompt: {type(self.prompt)})"
|
149 |
+
|
llm/{config.py β utils/config.py}
RENAMED
@@ -1,14 +1,14 @@
|
|
1 |
config = {
|
2 |
"HF_Mistrail": {
|
3 |
"model": "mistralai/Mistral-7B-Instruct-v0.2",
|
4 |
-
"temperature": 0.
|
5 |
"max_new_tokens": 1024,
|
6 |
"top_k": 5,
|
7 |
"load_in_8bit": True
|
8 |
},
|
9 |
"HF_TinyLlama": {
|
10 |
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
11 |
-
"temperature": 0.
|
12 |
"max_new_tokens": 1024,
|
13 |
"top_k": 5,
|
14 |
"top_p":0.95,
|
@@ -18,7 +18,7 @@ config = {
|
|
18 |
"LC_TinyLlama-1.1B-Chat-v1.0-GGUF": {
|
19 |
"model_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
|
20 |
"model_name": "tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin",
|
21 |
-
"temperature": 0.
|
22 |
"max_tokens": 868,
|
23 |
"top_p": 0.8,
|
24 |
"top_k": 5,
|
@@ -26,7 +26,7 @@ config = {
|
|
26 |
"LC_Phi-3-mini-4k-instruct-gguf": {
|
27 |
"model_url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
|
28 |
"model_name": "Phi-3-mini-4k-instruct-gguf.bin",
|
29 |
-
"temperature": 0.
|
30 |
"max_tokens": 868,
|
31 |
"top_p": 0.8,
|
32 |
"top_k": 5,
|
|
|
1 |
config = {
|
2 |
"HF_Mistrail": {
|
3 |
"model": "mistralai/Mistral-7B-Instruct-v0.2",
|
4 |
+
"temperature": 0.5,
|
5 |
"max_new_tokens": 1024,
|
6 |
"top_k": 5,
|
7 |
"load_in_8bit": True
|
8 |
},
|
9 |
"HF_TinyLlama": {
|
10 |
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
11 |
+
"temperature": 0.5,
|
12 |
"max_new_tokens": 1024,
|
13 |
"top_k": 5,
|
14 |
"top_p":0.95,
|
|
|
18 |
"LC_TinyLlama-1.1B-Chat-v1.0-GGUF": {
|
19 |
"model_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
|
20 |
"model_name": "tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin",
|
21 |
+
"temperature": 0.5,
|
22 |
"max_tokens": 868,
|
23 |
"top_p": 0.8,
|
24 |
"top_k": 5,
|
|
|
26 |
"LC_Phi-3-mini-4k-instruct-gguf": {
|
27 |
"model_url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
|
28 |
"model_name": "Phi-3-mini-4k-instruct-gguf.bin",
|
29 |
+
"temperature": 0.5,
|
30 |
"max_tokens": 868,
|
31 |
"top_p": 0.8,
|
32 |
"top_k": 5,
|
llm/{hf_interface.py β utils/hf_interface.py}
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
from abc import ABC, abstractmethod
|
2 |
-
|
3 |
|
4 |
class HFInterface(ABC):
|
5 |
@abstractmethod
|
6 |
-
def execution(self):
|
7 |
-
"""Method execution LLM model based on HuggingFace or
|
8 |
pass
|
|
|
1 |
from abc import ABC, abstractmethod
|
2 |
+
from typing import Any, Optional
|
3 |
|
4 |
class HFInterface(ABC):
|
5 |
@abstractmethod
|
6 |
+
def execution(self) -> Optional[Any]:
|
7 |
+
"""Method execution LLM model based on HuggingFace or others"""
|
8 |
pass
|
llm/{lc_interface.py β utils/lc_interface.py}
RENAMED
File without changes
|
llm/utils/toggle.py
DELETED
File without changes
|
logs/chelsea_llm_chat.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
2024-07-30 19:01:46,091 - INFO - Getting information from chat module
|
logs/chelsea_llm_gemini.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
2024-07-30 19:01:45,915 - INFO - Getting information from apimodel module
|
logs/chelsea_llm_huggingfacehub.log
CHANGED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
|
2 |
+
2024-07-30 19:01:45,937 - INFO - Getting information from apimodel module
|
logs/chelsea_llm_llamacpp.log
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
requirements.txt
CHANGED
@@ -15,19 +15,23 @@ openai
|
|
15 |
ffprobe
|
16 |
|
17 |
# related to huggingface
|
18 |
-
torch~=2.2.
|
19 |
transformers~=4.40.1
|
20 |
accelerate
|
21 |
sentence-transformers
|
22 |
bitsandbytes
|
23 |
einops
|
24 |
|
|
|
|
|
|
|
25 |
# related to langchain
|
26 |
langchain~=0.1.17
|
|
|
27 |
pypdf
|
28 |
chromadb
|
29 |
|
30 |
# related to audio
|
31 |
librosa==0.10.1
|
32 |
soundfile~=0.12.1
|
33 |
-
numpy~=1.
|
|
|
15 |
ffprobe
|
16 |
|
17 |
# related to huggingface
|
18 |
+
torch~=2.2.0
|
19 |
transformers~=4.40.1
|
20 |
accelerate
|
21 |
sentence-transformers
|
22 |
bitsandbytes
|
23 |
einops
|
24 |
|
25 |
+
#gemini-langchain
|
26 |
+
langchain_google_genai
|
27 |
+
|
28 |
# related to langchain
|
29 |
langchain~=0.1.17
|
30 |
+
langchain-community
|
31 |
pypdf
|
32 |
chromadb
|
33 |
|
34 |
# related to audio
|
35 |
librosa==0.10.1
|
36 |
soundfile~=0.12.1
|
37 |
+
numpy~=1.24.4
|
todo.txt
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
ToDo
|
2 |
-
|
3 |
-
- ΠΠ΅ΡΠ΅ΠΏΠΈΡΠ°ΡΠΈ ΠΌΠΎΠ΄ΡΠ»Ρ ΠΊΠΎΠΌΠ°Π½Π΄ΠΈ ΠΎΠ½ΠΎΠ²ΠΈΠ²ΡΠΈ ΠΏΠ°ΡΠ΅Π΅ΡΠ½ Π½Π° Factory, Π΄ΠΎΠ΄Π°ΡΠΈ ΡΠ½ΡΡ ΠΊΠΎΠΌΠ°Π½Π΄ΠΈ. ΠΡΠΈΠ΄ΡΠΌΠ°ΡΠΈ ΡΠΊ Π· Π½ΠΈΠΌΠΈ Π²Π·Π°ΡΠΌΠΎΠ΄ΡΡΡΠΈ
|
4 |
-
- ΠΠΎΠ΄Π°ΡΠΈ ΠΎΡΠΈΡΠ΅Π½Π½Ρ Π΄Π»Ρ lc
|
5 |
-
- ΠΠΎΠ΄Π°ΡΠΈ ΡΠ΅ ΠΌΠΎΠ΄Π΅Π»Ρ Π΄ΠΎ hf ΡΠ° Π·ΡΠΎΠ±ΠΈΡΠΈ ΠΏΠ΅Π½Π°Π»ΡΡΡ
|
6 |
-
- ΠΠ½ΠΎΠ²ΠΈΡΠΈ ΠΏΡΠΎΠΌΠΏΡΠΈ
|
7 |
-
- ΠΡΠΈΠ΄ΡΠΌΠ°ΡΠΈ ΠΌΠΎΠΆΠ»ΠΈΠ²ΡΡΡΡ Π·ΠΌΡΠ½ΠΈ ΠΌΠΎΠ΄Π΅Π»Ρ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{command β utils}/__init__.py
RENAMED
File without changes
|
utils/documentation.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
TEMPLATE = """
|
4 |
+
A complete list of commands that are designed to facilitate the use of the voice assistant Chelsea.
|
5 |
+
The complete list consists of no more than 100 commands written in a txt file.
|
6 |
+
The list of commands will be updated as the assistant is developed.
|
7 |
+
The first version of the program (Arctic Monkeys) contains a total of 2 commands.
|
8 |
+
|
9 |
+
The list of commands and their use.
|
10 |
+
"""
|
11 |
+
|
12 |
+
class Documentation():
|
13 |
+
def execution(self):
|
14 |
+
st.write(f"Documentation:\n\n{TEMPLATE}")
|
utils/keywords.py
ADDED
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List
|
2 |
+
|
3 |
+
keywords: Dict[str, List[str]] = {
|
4 |
+
"math": [
|
5 |
+
"math", "mathematica", "algebra", "geometry", "calculus", "trigonometry", "arithmetic",
|
6 |
+
"statistics", "probability", "equation", "theorem", "proof",
|
7 |
+
"function", "integral", "derivative", "matrix", "vector",
|
8 |
+
"series", "sequence", "logarithm", "exponent", "ratio",
|
9 |
+
"proportion", "mean", "median", "mode", "variance",
|
10 |
+
"standard deviation", "normal distribution", "binomial distribution", "polynomial", "quadratic",
|
11 |
+
"linear", "nonlinear", "differential equation", "graph", "plot",
|
12 |
+
"coordinate", "axis", "slope", "intercept", "angle",
|
13 |
+
"sin", "cos", "tan", "cot", "sec", "sine", "cosine", "tangent", "cotangent", "secant", "cosecant",
|
14 |
+
"csc", "hypotenuse", "adjacent", "opposite", "prime number",
|
15 |
+
"composite number", "factor", "multiple", "divisor", "fraction",
|
16 |
+
"decimal", "percent", "permutation", "combination", "set",
|
17 |
+
"subset", "union", "intersection", "probability distribution", "random variable",
|
18 |
+
"symmetric", "asymmetric", "function", "domain", "range",
|
19 |
+
"parabola", "hyperbola", "ellipse", "conic sections", "scalar",
|
20 |
+
"dot product", "cross product", "eigenvalue", "eigenvector", "identity matrix",
|
21 |
+
"determinant", "transpose", "inverse matrix", "system of equations", "linear transformation",
|
22 |
+
"orthogonal", "projection", "angle bisector", "midpoint", "distance formula",
|
23 |
+
"circumference", "area", "volume", "surface area", "unit circle",
|
24 |
+
"radian", "degree", "sector", "arc", "chord",
|
25 |
+
"geometric sequence", "arithmetic sequence", "infinity", "limit", "continuity",
|
26 |
+
"convergence", "divergence", "power series", "taylor series", "maclaurin series",
|
27 |
+
"partial fraction", "improper integral", "line integral", "surface integral", "triple integral"
|
28 |
+
],
|
29 |
+
"physics": [
|
30 |
+
"physics", "quantum mechanics", "relativity", "thermodynamics", "electromagnetism", "classical mechanics",
|
31 |
+
"particle physics", "nuclear physics", "optics", "wave", "particle",
|
32 |
+
"energy", "force", "motion", "velocity", "acceleration",
|
33 |
+
"mass", "weight", "gravity", "magnetism", "electricity",
|
34 |
+
"current", "voltage", "resistance", "circuit", "magnetic field",
|
35 |
+
"electric field", "photon", "electron", "proton", "neutron",
|
36 |
+
"atom", "molecule", "nucleus", "radioactivity", "fission",
|
37 |
+
"fusion", "black hole", "big bang", "entropy", "enthalpy",
|
38 |
+
"conduction", "convection", "radiation", "wave-particle duality", "superposition",
|
39 |
+
"entanglement", "heisenberg uncertainty principle", "schrodinger's cat", "string theory", "standard model",
|
40 |
+
"higgs boson", "dark matter", "dark energy", "gravitational waves", "relativistic effects",
|
41 |
+
"time dilation", "length contraction", "inertia", "momentum", "angular momentum",
|
42 |
+
"torque", "centripetal force", "centrifugal force", "kinetic energy", "potential energy",
|
43 |
+
"work", "power", "conservation laws", "frame of reference", "wave function",
|
44 |
+
"quantum state", "wave equation", "phase", "frequency", "wavelength",
|
45 |
+
"amplitude", "interference", "diffraction", "polarization", "reflection",
|
46 |
+
"refraction",
|
47 |
+
"plasma", "boson", "fermion", "lepton", "quark",
|
48 |
+
"gluons", "neutrino", "antimatter", "tachyon", "string",
|
49 |
+
"brane", "multiverse", "spacetime", "singularity", "event horizon",
|
50 |
+
"hawking radiation", "cosmology", "cosmic microwave background", "redshift", "blueshift",
|
51 |
+
"doppler effect", "hubble's law", "expansion of the universe", "cosmic inflation", "dark flow",
|
52 |
+
"supernova", "neutron star", "pulsar", "quasar", "gamma-ray burst",
|
53 |
+
"solar wind", "magnetosphere", "aurora", "flux", "field lines",
|
54 |
+
"ampere's law", "faraday's law", "gauss's law", "lorentz force", "coulomb's law",
|
55 |
+
"kepler's laws", "newton's laws", "planck's constant", "speed of light", "universal constant",
|
56 |
+
"atomic mass", "atomic number", "isotope", "half-life", "binding energy",
|
57 |
+
"solid state physics", "semiconductor", "superconductor", "hall effect", "ferromagnetism"
|
58 |
+
],
|
59 |
+
"history": [
|
60 |
+
"history", "revolution", "war", "empire", "kingdom", "dynasty",
|
61 |
+
"monarchy", "republic", "democracy", "constitution", "treaty",
|
62 |
+
"colonization", "independence", "civilization", "ancient", "medieval",
|
63 |
+
"modern", "industrialization", "renaissance", "enlightenment", "feudalism",
|
64 |
+
"imperialism", "nationalism", "communism", "socialism", "capitalism",
|
65 |
+
"fascism", "dictatorship", "cold war", "world war", "reformation",
|
66 |
+
"crusades", "expedition", "exploration", "discovery", "trade",
|
67 |
+
"silk road", "spice trade", "slavery", "abolition", "emancipation",
|
68 |
+
"civil rights", "suffrage", "holocaust", "genocide", "revolutionary war",
|
69 |
+
"civil war", "world war i", "world war ii", "great depression", "cold war",
|
70 |
+
"cold war era", "space race", "arms race", "nuclear proliferation", "decolonization",
|
71 |
+
"indian independence", "american revolution", "french revolution", "russian revolution", "chinese revolution",
|
72 |
+
"cuban revolution", "vietnam war", "korean war", "gulf war", "iraq war",
|
73 |
+
"afghanistan war", "middle ages", "renaissance period", "industrial revolution", "scientific revolution",
|
74 |
+
"age of exploration", "age of enlightenment", "victorian era", "edwardian era", "progressive era",
|
75 |
+
"roaring twenties", "great depression era", "post-war era", "cold war period", "digital age",
|
76 |
+
"information age", "modern era",
|
77 |
+
"ancient egypt", "ancient greece", "roman empire", "byzantine empire", "ottoman empire",
|
78 |
+
"ming dynasty", "qing dynasty", "han dynasty", "gupta empire", "mauryan empire",
|
79 |
+
"aztec empire", "inca empire", "maya civilization", "mesopotamia", "babylon",
|
80 |
+
"persian empire", "carthage", "vikings", "mongol empire", "medieval europe",
|
81 |
+
"feudal japan", "samurai", "shogunate", "mughal empire", "age of reason",
|
82 |
+
"scientific enlightenment", "american civil rights movement", "women's suffrage", "the great migration", "civil disobedience",
|
83 |
+
"nonviolent resistance", "apartheid", "mandela", "gandhi", "martin luther king jr.",
|
84 |
+
"malcolm x", "che guevara", "mao zedong", "stalin", "hitler",
|
85 |
+
"churchill", "fdr", "truman", "kennedy", "roosevelt",
|
86 |
+
"cold war espionage", "berlin wall", "iron curtain", "nato", "warsaw pact",
|
87 |
+
"marshall plan", "watergate", "cuban missile crisis", "yugoslav wars", "bosnian genocide",
|
88 |
+
"rwanda genocide", "darfur conflict", "arab spring", "syrian civil war", "russo-ukrainian war",
|
89 |
+
"brexit", "european union", "united nations", "nato", "world trade organization",
|
90 |
+
"nafta", "trans-pacific partnership", "climate change", "global warming", "paris agreement"
|
91 |
+
],
|
92 |
+
"technology": [
|
93 |
+
"artificial intelligence", "software", "hardware", "programming", "coding", "algorithm",
|
94 |
+
"machine learning", "artificial intelligence", "ai", "neural networks", "deep learning",
|
95 |
+
"data science", "big data", "cloud computing", "internet of things", "iot",
|
96 |
+
"cybersecurity", "encryption", "blockchain", "cryptocurrency", "bitcoin",
|
97 |
+
"ethereum", "smart contract", "virtual reality", "vr", "augmented reality",
|
98 |
+
"ar", "robotics", "automation", "3d printing", "biotechnology",
|
99 |
+
"genetic engineering", "nanotechnology", "quantum computing", "quantum technology", "5g",
|
100 |
+
"wireless communication", "network", "database", "sql", "nosql",
|
101 |
+
"web development", "frontend", "backend", "full stack", "html",
|
102 |
+
"css", "javascript", "react", "angular", "vue",
|
103 |
+
"node.js", "python", "java", "c++", "c#",
|
104 |
+
"ruby", "swift", "kotlin", "mobile development", "ios",
|
105 |
+
"android", "app development", "user interface", "ui", "user experience",
|
106 |
+
"ux", "responsive design", "devops", "agile", "scrum",
|
107 |
+
"kanban", "version control", "git", "github", "continuous integration",
|
108 |
+
"ci", "continuous deployment", "cd", "containerization", "docker",
|
109 |
+
"kubernetes", "microservices", "serverless", "cloud infrastructure", "aws",
|
110 |
+
"azure", "google cloud", "gcp", "digital transformation", "edge computing",
|
111 |
+
"fog computing", "smart home", "smart devices", "wearables", "fitness trackers",
|
112 |
+
"health tech", "medtech", "fintech", "edtech", "proptech",
|
113 |
+
"natural language processing", "nlp", "speech recognition", "chatbots", "virtual assistants",
|
114 |
+
"augmented reality", "mixed reality", "extended reality", "er", "haptic technology",
|
115 |
+
"3d modeling", "computer graphics", "game development", "simulation", "digital twins",
|
116 |
+
"smart cities", "connected vehicles", "autonomous vehicles", "self-driving cars", "electric vehicles",
|
117 |
+
"ev", "renewable energy", "solar power", "wind power", "hydroelectric power",
|
118 |
+
"smart grid", "energy storage", "battery technology", "wearable tech", "smartwatch",
|
119 |
+
"fitness tracker", "smart glasses", "e-learning", "online education", "moocs",
|
120 |
+
"massive open online courses", "online collaboration", "telemedicine", "remote work", "digital workspace",
|
121 |
+
"cryptography", "secure communications", "quantum encryption", "privacy", "data protection",
|
122 |
+
"regtech", "insurtech", "agritech", "contech", "govtech",
|
123 |
+
"martech", "hrtech", "legaltech", "real estate technology", "property management systems",
|
124 |
+
"virtual real estate", "3d printing construction", "sustainable technology", "cleantech", "green technology",
|
125 |
+
"environmental technology", "recycling technology", "waste management technology", "water purification technology", "air purification technology",
|
126 |
+
"carbon capture", "carbon footprint reduction", "renewable materials", "biodegradable materials", "smart packaging",
|
127 |
+
"advanced manufacturing", "industry 4.0", "internet of behaviors", "behavioral data", "personalization",
|
128 |
+
"customer experience", "cx", "user journey", "touchpoints", "interaction design",
|
129 |
+
"service design", "design thinking", "innovation management", "creative technology", "disruptive technology"
|
130 |
+
],
|
131 |
+
"life": [
|
132 |
+
"health", "wellness", "fitness", "nutrition", "diet",
|
133 |
+
"exercise", "mental health", "stress", "anxiety", "depression",
|
134 |
+
"mindfulness", "meditation", "yoga", "self-care", "hygiene",
|
135 |
+
"lifestyle", "work-life balance", "relationships", "friendship", "family",
|
136 |
+
"parenting", "education", "career", "job", "profession",
|
137 |
+
"hobbies", "interests", "travel", "adventure", "vacation",
|
138 |
+
"culture", "arts", "music", "film", "literature",
|
139 |
+
"reading", "writing", "creativity", "crafts", "diy",
|
140 |
+
"cooking", "recipes", "baking", "gardening", "sustainability",
|
141 |
+
"environment", "eco-friendly", "green living", "minimalism", "decluttering",
|
142 |
+
"finance", "budgeting", "saving", "investing", "retirement",
|
143 |
+
"insurance", "real estate", "housing", "mortgage", "renting",
|
144 |
+
"transportation", "driving", "public transit", "biking", "walking",
|
145 |
+
"pets", "animals", "dog", "cat", "pet care",
|
146 |
+
"community", "volunteering", "charity", "philanthropy", "social issues",
|
147 |
+
"politics", "government", "laws", "rights", "justice",
|
148 |
+
"equality", "diversity", "inclusion", "personal development", "self-improvement",
|
149 |
+
"goal setting", "productivity", "time management", "motivation", "inspiration",
|
150 |
+
"spirituality", "religion", "faith", "beliefs", "values",
|
151 |
+
"ethics", "morality", "philosophy", "wisdom", "knowledge",
|
152 |
+
"learning", "education system", "school", "college", "university",
|
153 |
+
"workplace", "entrepreneurship", "startups", "leadership", "management",
|
154 |
+
"mentorship", "networking", "communication skills", "public speaking", "negotiation",
|
155 |
+
"conflict resolution", "teamwork", "collaboration", "project management", "organizational skills",
|
156 |
+
"problem-solving", "critical thinking", "decision making", "emotional intelligence", "self-awareness",
|
157 |
+
"resilience", "adaptability", "creativity", "innovation", "lifelong learning",
|
158 |
+
"personal growth", "mindset", "habits", "routine", "discipline",
|
159 |
+
"focus", "concentration", "clarity", "vision", "values",
|
160 |
+
"purpose", "fulfillment", "happiness", "gratitude", "optimism",
|
161 |
+
"positive thinking", "self-esteem", "self-confidence", "body image", "self-acceptance",
|
162 |
+
"stress management", "relaxation", "rest", "sleep", "dreams",
|
163 |
+
"lucid dreaming", "mental clarity", "cognitive function", "memory", "learning styles",
|
164 |
+
"study techniques", "academic success", "career planning", "professional development", "workplace culture",
|
165 |
+
"employee engagement", "job satisfaction", "work environment", "remote work", "telecommuting",
|
166 |
+
"flexible work", "gig economy", "side hustle", "financial independence", "early retirement",
|
167 |
+
"fire movement", "minimalism", "simple living", "downshifting", "voluntary simplicity",
|
168 |
+
"sustainable living", "zero waste", "plastic-free", "veganism", "vegetarianism",
|
169 |
+
"plant-based diet", "organic food", "local food", "farm-to-table", "slow food",
|
170 |
+
"mindful eating", "intuitive eating", "diet culture", "body positivity", "weight management",
|
171 |
+
"fitness goals", "workout routines", "exercise science", "sports nutrition", "athletic performance",
|
172 |
+
"injury prevention", "rehabilitation", "physiotherapy", "chiropractic care", "alternative medicine",
|
173 |
+
"holistic health", "integrative medicine", "traditional medicine", "herbal medicine", "acupuncture",
|
174 |
+
"massage therapy", "aromatherapy", "sound therapy", "energy healing", "reiki",
|
175 |
+
"chakra balancing", "spiritual healing", "faith healing", "meditative practices", "breathwork",
|
176 |
+
"pranayama", "tai chi", "qigong", "martial arts", "self-defense",
|
177 |
+
"dance", "movement therapy", "art therapy", "music therapy", "drama therapy",
|
178 |
+
"play therapy", "animal-assisted therapy", "equine therapy", "nature therapy", "ecotherapy",
|
179 |
+
"forest bathing", "wild swimming", "outdoor activities", "hiking", "camping",
|
180 |
+
"backpacking", "mountaineering", "rock climbing", "bouldering", "caving",
|
181 |
+
"kayaking", "canoeing", "rafting", "sailing", "boating",
|
182 |
+
"fishing", "birdwatching", "wildlife photography", "stargazing", "astronomy",
|
183 |
+
"geocaching", "orienteering", "survival skills", "bushcraft", "foraging",
|
184 |
+
"homesteading", "permaculture", "urban gardening", "community gardening", "allotment gardening",
|
185 |
+
"container gardening", "vertical gardening", "indoor gardening", "houseplants", "succulents",
|
186 |
+
"bonsai", "orchids", "roses", "tulips", "wildflowers",
|
187 |
+
"herbs", "vegetables", "fruits", "berries", "nut trees",
|
188 |
+
"shade gardening", "water gardening", "xeriscaping", "landscape design", "garden design",
|
189 |
+
"garden maintenance", "pruning", "composting", "soil health", "organic gardening",
|
190 |
+
"pest control", "natural fertilizers", "greenhouse gardening", "aquaponics", "hydroponics",
|
191 |
+
"aquaculture", "sustainable agriculture", "regenerative agriculture", "carbon farming", "climate-smart agriculture",
|
192 |
+
"urban farming", "city farming", "vertical farming", "rooftop farming", "indoor farming",
|
193 |
+
"community supported agriculture", "csa", "farmers markets", "local food systems", "food sovereignty",
|
194 |
+
"food security", "food justice", "food deserts", "food waste", "food recovery",
|
195 |
+
"gleaning", "food rescue", "food banks", "food pantries", "community kitchens",
|
196 |
+
"soup kitchens", "meal programs", "nutrition education", "food policy", "agricultural policy",
|
197 |
+
"food industry", "food science", "food technology", "food safety", "food regulation",
|
198 |
+
"dietary guidelines", "nutrition research", "public health", "global health", "health equity",
|
199 |
+
"social determinants of health", "healthcare access", "universal healthcare", "healthcare policy", "health insurance",
|
200 |
+
"primary care", "preventive care", "chronic disease management", "mental health care", "substance abuse treatment",
|
201 |
+
"addiction recovery", "rehabilitation services", "disability services", "elder care", "geriatric care",
|
202 |
+
"end-of-life care", "palliative care", "hospice care", "patient advocacy", "health literacy",
|
203 |
+
"patient empowerment", "shared decision making", "informed consent", "medical ethics", "bioethics",
|
204 |
+
"genetic counseling", "reproductive health", "maternal health", "child health", "adolescent health",
|
205 |
+
"men's health", "women's health", "lgbtq+ health", "sexual health", "sex education",
|
206 |
+
"family planning", "birth control", "fertility", "infertility", "adoption",
|
207 |
+
"surrogacy", "pregnancy", "prenatal care", "postpartum care", "breastfeeding",
|
208 |
+
"newborn care", "pediatric care", "immunizations", "vaccinations", "infectious diseases",
|
209 |
+
"pandemics", "epidemics", "public health response", "emergency preparedness", "disaster response",
|
210 |
+
"humanitarian aid", "global development", "international relations", "diplomacy", "peacekeeping",
|
211 |
+
"conflict resolution", "human rights", "civil liberties", "social justice", "advocacy",
|
212 |
+
"activism", "community organizing", "grassroots movements", "social movements", "political activism",
|
213 |
+
"environmental activism", "climate activism", "sustainable development", "social entrepreneurship", "impact investing",
|
214 |
+
"corporate social responsibility", "csr", "ethical business", "fair trade", "sustainable fashion",
|
215 |
+
"slow fashion", "circular economy", "zero waste lifestyle", "plastic-free living", "minimal waste",
|
216 |
+
"eco-friendly products", "green products", "sustainable brands", "ethical consumerism", "conscious consumerism",
|
217 |
+
"mindful living", "simple living", "voluntary simplicity", "intentional living", "purpose-driven life",
|
218 |
+
"values-driven life", "authenticity", "integrity", "vulnerability", "empathy",
|
219 |
+
"compassion", "kindness", "gratitude", "generosity", "service",
|
220 |
+
"community service", "volunteerism", "philanthropy", "charitable giving", "social impact",
|
221 |
+
"civic engagement", "democratic participation", "voting", "elections", "campaigns",
|
222 |
+
"political engagement", "policy advocacy", "government accountability", "transparency", "good governance",
|
223 |
+
"public accountability", "citizen oversight", "public participation", "community involvement", "collective action",
|
224 |
+
"solidarity", "social cohesion", "community resilience", "disaster resilience", "climate resilience",
|
225 |
+
"ecosystem resilience", "environmental stewardship", "conservation", "biodiversity", "wildlife protection",
|
226 |
+
"habitat restoration", "ecosystem services", "natural resources", "sustainable resource management", "renewable resources",
|
227 |
+
"non-renewable resources", "energy conservation", "water conservation", "soil conservation", "forest conservation",
|
228 |
+
"marine conservation", "sustainable fisheries", "sustainable forestry", "sustainable agriculture", "sustainable tourism",
|
229 |
+
"eco-tourism", "nature-based tourism", "cultural tourism", "heritage tourism", "community-based tourism",
|
230 |
+
"responsible tourism", "ethical tourism", "regenerative tourism", "adventure tourism", "wildlife tourism",
|
231 |
+
"urban tourism", "rural tourism", "agritourism", "gastrotourism", "culinary tourism",
|
232 |
+
"food tourism", "wine tourism", "beer tourism", "coffee tourism", "chocolate tourism",
|
233 |
+
"craft tourism", "artisan tourism", "handicraft tourism", "souvenir tourism", "local tourism",
|
234 |
+
"staycations", "domestic tourism", "short-term rentals", "vacation rentals", "holiday homes",
|
235 |
+
"vacation homes", "second homes", "vacation planning", "travel planning", "itinerary planning",
|
236 |
+
"travel tips", "packing tips", "travel hacks", "budget travel", "luxury travel",
|
237 |
+
"solo travel", "group travel", "family travel", "pet-friendly travel", "accessible travel",
|
238 |
+
"sustainable travel", "green travel", "slow travel", "responsible travel", "ethical travel",
|
239 |
+
"off-the-beaten-path travel", "hidden gems", "bucket list", "once-in-a-lifetime trips", "once-in-a-lifetime experiences",
|
240 |
+
"adventure travel", "extreme travel", "extreme sports", "extreme adventures", "thrill-seeking",
|
241 |
+
"adrenaline junkie", "challenge", "personal challenge", "physical challenge", "mental challenge",
|
242 |
+
"growth mindset", "fixed mindset", "open-mindedness", "curiosity", "exploration",
|
243 |
+
"discovery", "innovation", "creativity", "invention", "problem-solving",
|
244 |
+
"critical thinking", "strategic thinking", "analytical thinking", "logical thinking", "scientific thinking",
|
245 |
+
"philosophical thinking", "ethical thinking", "creative thinking", "design thinking", "systems thinking",
|
246 |
+
"complexity", "ambiguity", "uncertainty", "paradox", "dilemma",
|
247 |
+
"contradiction", "tension", "balance", "harmony", "equilibrium",
|
248 |
+
"equanimity", "tranquility", "serenity", "calm", "peace",
|
249 |
+
"inner peace", "outer peace", "global peace", "world peace", "peacebuilding",
|
250 |
+
"conflict prevention", "conflict resolution", "mediation", "negotiation", "dialogue",
|
251 |
+
"understanding", "tolerance", "acceptance", "inclusion", "diversity",
|
252 |
+
"multiculturalism", "pluralism", "intercultural dialogue", "interfaith dialogue", "cross-cultural communication",
|
253 |
+
"cross-cultural understanding", "interpersonal communication", "interpersonal skills", "relationship building", "relationship management",
|
254 |
+
"relationship maintenance", "relationship development", "relationship repair", "relationship enhancement", "relationship success",
|
255 |
+
"relationship satisfaction", "relationship happiness", "relationship fulfillment", "relationship growth", "relationship dynamics",
|
256 |
+
"family dynamics", "family relationships", "parent-child relationships", "sibling relationships", "extended family",
|
257 |
+
"family systems", "family therapy", "family counseling", "marriage", "marriage counseling",
|
258 |
+
"divorce", "separation", "co-parenting", "blended families", "stepfamilies",
|
259 |
+
"adoption", "foster care", "child welfare", "child protection", "child development",
|
260 |
+
"childhood", "adolescence", "adolescent development", "youth development", "youth programs",
|
261 |
+
"youth leadership", "youth empowerment", "youth engagement", "youth advocacy", "youth participation",
|
262 |
+
"youth voice", "youth rights", "youth justice", "youth crime", "youth violence",
|
263 |
+
"youth gangs", "youth homelessness", "youth mental health", "youth substance abuse", "youth addiction",
|
264 |
+
"youth education", "youth employment", "youth entrepreneurship", "youth innovation", "youth creativity",
|
265 |
+
"youth sports", "youth arts", "youth culture", "youth identity", "youth diversity",
|
266 |
+
"youth inclusion", "youth equity", "youth social justice", "youth environmental justice", "youth climate action",
|
267 |
+
"youth activism", "youth advocacy", "youth leadership", "youth participation", "youth empowerment",
|
268 |
+
"youth engagement", "youth organizing", "youth mobilization", "youth networks", "youth movements",
|
269 |
+
"youth campaigns", "youth initiatives", "youth projects", "youth programs", "youth services",
|
270 |
+
"youth organizations", "youth groups", "youth clubs", "youth associations", "youth councils",
|
271 |
+
"youth committees", "youth forums", "youth dialogues", "youth workshops", "youth conferences",
|
272 |
+
"youth summits", "youth assemblies", "youth festivals", "youth events", "youth activities",
|
273 |
+
"youth education", "youth training", "youth development", "youth mentoring", "youth coaching",
|
274 |
+
"youth support", "youth advocacy", "youth empowerment", "youth engagement", "youth participation",
|
275 |
+
"youth leadership", "youth entrepreneurship", "youth innovation", "youth creativity", "youth development",
|
276 |
+
"youth programs", "youth services", "youth organizations", "youth groups", "youth clubs",
|
277 |
+
"youth associations", "youth councils", "youth committees", "youth forums", "youth dialogues",
|
278 |
+
"youth workshops", "youth conferences"
|
279 |
+
],
|
280 |
+
"riddles": [
|
281 |
+
"riddle", "puzzle", "brain teaser", "what am I", "guess",
|
282 |
+
"mystery", "conundrum", "enigma", "paradox", "trick question",
|
283 |
+
"wordplay", "challenge", "logic puzzle", "mind bender",
|
284 |
+
"cryptic", "clue", "riddle me this", "solve", "answer", "question"
|
285 |
+
]
|
286 |
+
}
|
utils/prompt_toggle.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import yaml
|
4 |
+
|
5 |
+
from typing import Any, Dict, List
|
6 |
+
from langchain.prompts import PromptTemplate
|
7 |
+
|
8 |
+
cwd: str = os.getcwd()
|
9 |
+
propmt_file_path: str = os.path.join(cwd, "utils/prompts.yaml")
|
10 |
+
|
11 |
+
# Load prompts from yaml
|
12 |
+
def load_prompts():
|
13 |
+
try:
|
14 |
+
with open(propmt_file_path, "r") as f:
|
15 |
+
return yaml.safe_load(f)['prompts']
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Reading prompts file has failed {e}")
|
18 |
+
|
19 |
+
# Preprocess text and keywords
|
20 |
+
def __preprocess_text(text: str) -> List[Any]:
|
21 |
+
return re.findall(r'\b\w+\b', text.lower())
|
22 |
+
|
23 |
+
def __preprocess_keywords(keywords: Dict[str, List[str]]) -> Dict[str, List[str]]:
|
24 |
+
preprocessed_keywords = {}
|
25 |
+
for category, kw_list in keywords.items():
|
26 |
+
preprocessed_keywords[category] = set(kw.lower() for kw in kw_list)
|
27 |
+
return preprocessed_keywords
|
28 |
+
|
29 |
+
# Check for keywords in input text
|
30 |
+
def __check_for_keywords(text: str, keywords: Dict[str, List[str]]) -> Dict[str, List[str]]:
|
31 |
+
preprocessed_keywords = __preprocess_keywords(keywords)
|
32 |
+
matched_keywords = {category: [] for category in keywords}
|
33 |
+
words = __preprocess_text(text)
|
34 |
+
|
35 |
+
for word in words:
|
36 |
+
for category, kw_set in preprocessed_keywords.items():
|
37 |
+
if word in kw_set:
|
38 |
+
matched_keywords[category].append(word)
|
39 |
+
|
40 |
+
matched_keywords = {category: list(set(matches)) for category, matches in matched_keywords.items() if matches}
|
41 |
+
|
42 |
+
return matched_keywords
|
43 |
+
|
44 |
+
# Select the most appropriate prompt based on matched keywords
|
45 |
+
def select_prompt(input_text: str, prompts: Any, keywords: Dict[str, List[str]]) -> str:
|
46 |
+
matched_keywords = __check_for_keywords(input_text, keywords)
|
47 |
+
matched_categories = list(matched_keywords.keys())
|
48 |
+
|
49 |
+
# Default to the highest rated common prompt if no specific category is matched
|
50 |
+
selected_prompt = max((p for p in prompts if 'common' in p['purpose']), key=lambda p: p['rate'], default=None)
|
51 |
+
|
52 |
+
for category in matched_categories:
|
53 |
+
category_prompts = [p for p in prompts if category in p['purpose']]
|
54 |
+
if category_prompts:
|
55 |
+
selected_prompt = max(category_prompts, key=lambda p: p['rate'], default=selected_prompt)
|
56 |
+
|
57 |
+
prompt_template = PromptTemplate(template=selected_prompt['prompt_template'], input_variables=['entity'])
|
58 |
+
prompt = prompt_template.format(entity=input_text)
|
59 |
+
return prompt
|
utils/prompts.yaml
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://www.promptingguide.ai/techniques/
|
2 |
+
|
3 |
+
# Add more examples of the template here, id should be an interval from 0 to 99_999
|
4 |
+
# The description provides a basic overview of the template, the description should include the level of difficulty,
|
5 |
+
# the name of the prompting method and end with the word "prompt", e.g. "short prompt" or "zero-shot thought chain prompt
|
6 |
+
# The prompt is rated from 0 to 10
|
7 |
+
# common purpose means that the prompt fits for all situations
|
8 |
+
|
9 |
+
prompts:
|
10 |
+
- id: 0
|
11 |
+
prompt_template: |
|
12 |
+
Instruction: You are a voice assistant who enjoys to help people called Chelsea
|
13 |
+
Question: {entity}
|
14 |
+
Answer:
|
15 |
+
description: main prompt
|
16 |
+
rate: 8
|
17 |
+
purpose: ['common']
|
18 |
+
|
19 |
+
- id: 1
|
20 |
+
prompt_template: |
|
21 |
+
Instruction: just give a response
|
22 |
+
Question: {entity}
|
23 |
+
Answer:
|
24 |
+
description: a simple prompt
|
25 |
+
rate: 1
|
26 |
+
purpose: ['common']
|
27 |
+
|
28 |
+
- id: 2
|
29 |
+
prompt_template: |
|
30 |
+
Instruction: Write a concise answer on the question with one example if it's possible. CONCISE ANSWER.
|
31 |
+
Question: {entity}
|
32 |
+
Answer:
|
33 |
+
description: concise prompt
|
34 |
+
rate: 3
|
35 |
+
purpose: ['common']
|
36 |
+
|
37 |
+
# useful for solving simple math task
|
38 |
+
- id: 3
|
39 |
+
prompt_template: |
|
40 |
+
Instruction: Let's think step by step.
|
41 |
+
Question: {entity}
|
42 |
+
Answer:
|
43 |
+
description: zero-shot chain-of-thoughts prompt
|
44 |
+
rate: 4
|
45 |
+
purpose: ['math', 'physics', 'technology']
|
46 |
+
|
47 |
+
# another one example for solving simple math task
|
48 |
+
- id: 4
|
49 |
+
prompt_template: |
|
50 |
+
Instruction:
|
51 |
+
Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls.
|
52 |
+
Each can has 3 tennis balls. How many tennis balls does he have now?
|
53 |
+
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
|
54 |
+
Question: {entity}
|
55 |
+
Answer:
|
56 |
+
description: few-shot chain-of-thoughts prompt
|
57 |
+
rate: 4
|
58 |
+
purpose: ['riddles']
|
59 |
+
|
60 |
+
- id: 5
|
61 |
+
prompt_template: |
|
62 |
+
Instruction:
|
63 |
+
Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done,
|
64 |
+
there will be 21 trees. How many trees did the grove workers plant today?
|
65 |
+
A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted.
|
66 |
+
So, they must have planted 21 - 15 = 6 trees. The answer is 6.
|
67 |
+
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
|
68 |
+
A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.
|
69 |
+
Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
|
70 |
+
A: Leah had 32 chocolates and Leah`s sister had 42. That means there were originally 32 + 42 = 74
|
71 |
+
chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.
|
72 |
+
Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops
|
73 |
+
did Jason give to Denny?
|
74 |
+
A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of
|
75 |
+
lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.
|
76 |
+
Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does
|
77 |
+
he have now?
|
78 |
+
A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so
|
79 |
+
in total he has 7 + 2 = 9 toys. The answer is 9.
|
80 |
+
Q: There were nine computers in the server room. Five more computers were installed each day, from
|
81 |
+
monday to thursday. How many computers are now in the server room?
|
82 |
+
A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 =
|
83 |
+
20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers.
|
84 |
+
The answer is 29.
|
85 |
+
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many
|
86 |
+
golf balls did he have at the end of wednesday?
|
87 |
+
A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On
|
88 |
+
Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.
|
89 |
+
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
|
90 |
+
A: She bought 5 bagels for $3 each. This means she spent $15. She has $8 left.
|
91 |
+
Question: {entity}
|
92 |
+
Answer:
|
93 |
+
description: self-consistency prompt
|
94 |
+
rate: 6
|
95 |
+
purpose: ['riddles']
|