CineAI commited on
Commit
e294914
β€’
1 Parent(s): d3da264

4172637469634d6f6e6b6579733a33302e30372e3234

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ **/__pycache__
2
+ .history/
.streamlit/config.toml CHANGED
@@ -1,2 +1,2 @@
1
  [theme]
2
- backgroundColor = "black"
 
1
  [theme]
2
+ backgroundColor = "#363534"
app.py CHANGED
@@ -1,59 +1,120 @@
1
- # version - ArcticMonkey:19.03.24:1743
2
 
3
  # python core libraries
4
- import os
5
-
6
  import psutil
7
-
 
8
  # components from other authors
9
  from streamlit_mic_recorder import mic_recorder
10
-
11
  # core modules
12
  from audio_processing.A2T import A2T
13
  from audio_processing.T2A import T2A
14
- from command.utils import build_chain
15
- from llm.llm_factory import LLM_Factory
16
-
17
-
18
- # МоТна Ρ€Π΅Π΄Π°Π³ΡƒΠ²Π°Ρ‚ΠΈ Π°Π±ΠΈ Π½Π΅ ΠΏΠΎΠ²Ρ‚Ρ€ΡŽΠ²Π°Π»ΠΎΡΡŒ
19
- greeting_text = "Hi, my name is M8... oops, that's from my future, but right now I'm Chelsea, your personal voice assistant. Ask me anything you want and I'll try to help you."
20
-
21
- llm_model = LLM_Factory()
22
-
23
- def prepare_cor(input_text: str):
24
- return build_chain.build_command_chain().handle_command(input_text)
25
-
26
- # Π‘Π°Π·ΠΎΠ²ΠΎ Π±ΡƒΠ΄Π΅ hf, Π° Π΄Π°Π»Ρ– виходячи Π· завдання Π±ΡƒΠ΄Π΅ Π·ΠΌΡ–Π½ΡŽΠ²Π°Ρ‚ΠΈΡΡ.
27
- # Π’Ρ–Π΄ моТливості використовувати AI agent Ρ‚Π°ΠΊΠΎΠΆ Π±ΡƒΠ΄Π΅ Π·Π°Π»Π΅ΠΆΠ°Ρ‚ΠΈ trigger, якщо Π²ΠΈΠΊΠΎΡ€ΠΈΡΡ‚ΠΎΠ²ΡƒΡ”Ρ‚ΡŒΡΡ, Ρ‚ΠΎ Π»Ρ–ΠΏΡˆΠ΅ використовувати lc Π½Ρ–ΠΆ hf.
28
- trigger = {"hf": "effective"}
29
-
30
  t2a = T2A()
31
 
32
  def main():
33
- t2a.autoplay(greeting_text, just_once=True)
34
-
35
- mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True)
36
-
37
- if mic is not None:
38
- a2t = A2T(mic["bytes"])
39
- text = a2t.predict()
40
- print(text)
41
-
42
- # ΠŸΡ€ΠΈΠ΄ΡƒΠΌΠ°Ρ‚ΠΈ як Ρ€Π΅Π°Π»Ρ–Π·ΡƒΠ²Π°Ρ‚ΠΈ ΠΊΠΎΠΌΠ°Π½Π΄ΠΈ
43
- # prepare_cor(input_text=text)
44
-
45
- # Π’Ρ€Π΅Π±Π° для lc Ρ€Π΅Π°Π»Ρ–Π·ΡƒΠ²Π°Ρ‚ΠΈ Π±ΡƒΠ΄Π΅ Π²ΠΈΠΊΠ»ΠΈΠΊ ΠΎΡ‡ΠΈΡˆΠ΅Π½Π½Ρ
46
- llm = llm_model.create_llm(prompt_entity=text, prompt_id=1, trigger=trigger)
47
- response = llm.execution() if llm is not None else "Oops occurred some error. Please try again. Who is Jhon Galt!"
48
-
49
- # Π’Ρ€Π΅Π±Π° Π±ΡƒΠ΄Π΅ пСрСписати клас, ΠΏΠ΅Ρ€Π΅Π΄Π°Π²Π°Ρ‚ΠΈ тСкст Π½Π΅ Ρ‡Π΅Ρ€Π΅Π· __init__ Π° Π² autoplay.
50
- t2a.autoplay(response)
51
-
 
 
52
 
53
  if __name__ == "__main__":
54
  print(f"Total Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB")
55
  print(f"Available Memory: {psutil.virtual_memory().available / (1024**3):.2f} GB")
56
  print(f"CPU Cores: {psutil.cpu_count()}")
57
  print(f"CPU Usage: {psutil.cpu_percent()}%")
58
-
59
  main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # version - ArcticMonkeys:30.07.24
2
 
3
  # python core libraries
 
 
4
  import psutil
5
+ # streamlit
6
+ import streamlit as st
7
  # components from other authors
8
  from streamlit_mic_recorder import mic_recorder
 
9
  # core modules
10
  from audio_processing.A2T import A2T
11
  from audio_processing.T2A import T2A
12
+ from llm.utils.chat import Conversation
13
+ # utils modules
14
+ from utils.keywords import keywords
15
+ from utils.prompt_toggle import select_prompt, load_prompts
16
+ from utils.documentation import Documentation
17
+
18
+ # TODO:
19
+ # * Π—Ρ€ΠΎΠ±ΠΈΡ‚ΠΈ Π² utils ΠΌΠΎΠΆΠ»ΠΈΠ²Ρ–ΡΡ‚ΡŒ для використання Ρ€Ρ–Π·Π½ΠΈΡ… ΠΏΡ€ΠΎΠΌΠΏΡ‚Ρ–Π² -> Done
20
+ # * Π”ΠΎΠ΄Π°Ρ‚ΠΈ як Ρ€ΠΎΠ±ΠΈΠ² Π½Π° HF Ρ…Ρ‚ΠΎ Π½Π° Ρ„ΠΎΡ‚ΠΎ -> agent
21
+ # * Π”ΠΎΠ΄Π°Ρ‚ΠΈ ΠΌΠΎΠΆΠ»ΠΈΠ²Ρ–Ρ‚ΡŒ малюнками Π²ΠΈΡ€Ρ–ΡˆΡƒΠ²Π°Ρ‚ΠΈ ΠΌΠ°Ρ‚ ΠΏΡ€ΠΎΠ±Π»Π΅ΠΌΠΈ -> agent
22
+ # * Π”ΠΎΠ΄Π°Ρ‚ΠΈ ΠΌΠΎΠ»ΠΈΠ²Ρ–ΡΡ‚ΡŒ ΡΡ‚Π²ΠΎΡ€ΡŽΠ²Π°Ρ‚ΠΈ/Ρ€Π΅Π΄Π°Π³ΡƒΠ²Π°Ρ‚ΠΈ Π΄ΠΎΠΊΠΌΠ΅Π½Ρ‚ΠΈ(pdf, docx) -> agent
23
+
24
+ prompts = load_prompts()
25
+ doc = Documentation()
26
+ chat = Conversation()
 
27
  t2a = T2A()
28
 
29
  def main():
30
+ try:
31
+ mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
32
+ if mic is not None:
33
+ a2t = A2T(mic["bytes"])
34
+ text = a2t.predict()
35
+ print(f"Text: {text}")
36
+
37
+ prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
38
+ print(f"Prompt:\n{prompt}")
39
+ response = chat.chatting(prompt=prompt if prompt is not None else text)
40
+ t2a.autoplay(response)
41
+
42
+ if response:
43
+ st.markdown(f"Your input: {prompt}")
44
+ st.markdown(f"Chelsea response: {response}")
45
+
46
+ prompt = None
47
+ response = None
48
+ except Exception as e:
49
+ print(f"An error occurred in main finction, reasone is: {e}")
50
+ doc.execution()
51
 
52
  if __name__ == "__main__":
53
  print(f"Total Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB")
54
  print(f"Available Memory: {psutil.virtual_memory().available / (1024**3):.2f} GB")
55
  print(f"CPU Cores: {psutil.cpu_count()}")
56
  print(f"CPU Usage: {psutil.cpu_percent()}%")
 
57
  main()
58
+ footer="""
59
+ <style>
60
+ /* Common styles for the footer */
61
+ .footer {
62
+ position: fixed;
63
+ left: 0;
64
+ bottom: 0;
65
+ width: 100%;
66
+ height: 60px; /* Set a fixed height for consistency */
67
+ font-size: 14px; /* Adjust font size for readability */
68
+ text-align: center;
69
+ padding: 15px 0; /* Reduced padding */
70
+ transition: color 0.3s, background-color 0.3s;
71
+ }
72
+
73
+ .footer p {
74
+ margin: 0; /* Remove default margins */
75
+ font-size: 18px; /* Adjust font size as needed */
76
+ }
77
+
78
+ a:link, a:visited {
79
+ text-decoration: dotted;
80
+ color: inherit; /* Use current text color */
81
+ }
82
+
83
+ a:hover, a:active {
84
+ background: linear-gradient(to right, #ffe44d, #ffdd1a, #ffd700, #ffd900);
85
+ -webkit-text-fill-color: transparent;
86
+ -webkit-background-clip: text;
87
+ }
88
+
89
+ .footer a:hover {
90
+ color: #ff4500; /* Different hover color */
91
+ }
92
+
93
+ /* Light mode styles */
94
+ @media (prefers-color-scheme: light) {
95
+ a:link, a:visited {
96
+ color: #0056b3; /* Blue color for links */
97
+ }
98
+
99
+ .footer a:hover {
100
+ color: #ff4500; /* Hover color for light mode */
101
+ }
102
+ }
103
+
104
+ /* Dark mode styles */
105
+ @media (prefers-color-scheme: dark) {
106
+ a:link, a:visited {
107
+ color: #ffd700; /* Gold color for links in dark mode */
108
+ }
109
+
110
+ .footer a:hover {
111
+ color: #ffa500; /* Hover color for dark mode */
112
+ }
113
+ }
114
+ </style>
115
+
116
+ <div class="footer">
117
+ <p>Please support the project on <a href="https://buymeacoffee.com/cineai" target="_blank">Buy Me a Coffee</a></p>
118
+ </div>
119
+ """
120
+ st.markdown(footer,unsafe_allow_html=True)
audio_processing/A2T.py CHANGED
@@ -1,6 +1,6 @@
1
- import numpy as np
2
- import librosa
3
  import io
 
 
4
 
5
  from typing import Optional
6
 
@@ -15,7 +15,7 @@ class A2T:
15
 
16
  def __generate_text(self, inputs, task: Optional[str] = None) -> str:
17
  if inputs is None:
18
- raise Exception("Inputs is None")
19
 
20
  transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
21
  return transcribed_text
@@ -34,7 +34,7 @@ class A2T:
34
  print(f"Sample rate : {sample_rate}")
35
  return audio
36
  except Exception as e:
37
- print(f"Error loading audio: {e}")
38
 
39
  def predict(self) -> str:
40
  try:
@@ -43,7 +43,7 @@ class A2T:
43
  audio = self.__preprocess(raw=raw)
44
  print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
45
  else:
46
- raise Exception("please provide audio")
47
 
48
  if isinstance(audio, np.ndarray):
49
  return self.__generate_text(inputs=audio, task=TASK)
@@ -51,4 +51,4 @@ class A2T:
51
  raise ValueError("Audio is not np array")
52
 
53
  except Exception as e:
54
- print(f"Oops some kinda error : {e}")
 
 
 
1
  import io
2
+ import librosa
3
+ import numpy as np
4
 
5
  from typing import Optional
6
 
 
15
 
16
  def __generate_text(self, inputs, task: Optional[str] = None) -> str:
17
  if inputs is None:
18
+ raise ValueError(f"Input audio is None {inputs}, please provide audio")
19
 
20
  transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
21
  return transcribed_text
 
34
  print(f"Sample rate : {sample_rate}")
35
  return audio
36
  except Exception as e:
37
+ print(f"Error loading audio in the preprocess function in the A2T class: {e}")
38
 
39
  def predict(self) -> str:
40
  try:
 
43
  audio = self.__preprocess(raw=raw)
44
  print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
45
  else:
46
+ raise ValueError(f"Please provide audio your audio {self.mic}")
47
 
48
  if isinstance(audio, np.ndarray):
49
  return self.__generate_text(inputs=audio, task=TASK)
 
51
  raise ValueError("Audio is not np array")
52
 
53
  except Exception as e:
54
+ print(f"An error occurred in the predict function in the A2T class: {e}")
audio_processing/T2A.py CHANGED
@@ -3,27 +3,24 @@ from streamlit_TTS import auto_play, text_to_audio
3
 
4
 
5
  class T2A:
6
- def autoplay(self, input_text: Optional[str] = None, lang: str = "en", just_once: bool = True) -> None:
7
  """
8
- Plays audio once based on the provided input text.
9
 
10
  Args:
11
  input_text (Optional[str], optional): Text to convert to audio. Defaults to None.
12
  lang (str, optional): Language for text-to-speech conversion. Defaults to "en".
13
- just_once (bool, optional): Flag to control whether audio plays only once. Defaults to False.
14
  """
15
 
16
- if input_text is not None:
17
- if isinstance(input_text, str):
18
- audio = text_to_audio(input_text, language=lang)
19
- if just_once:
20
- auto_play(audio)
21
- just_once = False
22
- else:
23
- text = f"The text you provided is of data type {type(input_text)}, only string type is accepted"
24
- audio = text_to_audio(text, language=lang)
25
- auto_play(audio)
26
- else:
27
  text = "Please check the input text you have provided, it has a value of None"
28
  audio = text_to_audio(text, language=lang)
29
  auto_play(audio)
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  class T2A:
6
+ def autoplay(self, input_text: Optional[str] = None, lang: str = "en") -> None:
7
  """
8
+ Plays audio based on the provided input text.
9
 
10
  Args:
11
  input_text (Optional[str], optional): Text to convert to audio. Defaults to None.
12
  lang (str, optional): Language for text-to-speech conversion. Defaults to "en".
 
13
  """
14
 
15
+ if input_text is None:
 
 
 
 
 
 
 
 
 
 
16
  text = "Please check the input text you have provided, it has a value of None"
17
  audio = text_to_audio(text, language=lang)
18
  auto_play(audio)
19
+
20
+ if not isinstance(input_text, str):
21
+ text = f"The text you provided is of data type {type(input_text)}, only string type is accepted"
22
+ audio = text_to_audio(text, language=lang)
23
+ auto_play(audio)
24
+
25
+ audio = text_to_audio(input_text, language=lang)
26
+ auto_play(audio)
audio_processing/config.py CHANGED
@@ -2,10 +2,8 @@
2
  # https://magictool.ai/tool/text-to-hex-converter/ Here ArcticMonkey is name of version and rest of all is data and time
3
 
4
  import torch
5
-
6
  from transformers import pipeline
7
 
8
-
9
  device = 0 if torch.cuda.is_available() else "cpu"
10
 
11
  checkpoint_whisper = "openai/whisper-medium"
@@ -15,20 +13,4 @@ pipe = pipeline(
15
  model=checkpoint_whisper,
16
  device=device,
17
  chunk_length_s=30,
18
- )
19
-
20
- # from parler_tts import ParlerTTSForConditionalGeneration
21
- # from transformers import AutoTokenizer, AutoFeatureExtractor
22
-
23
- # checkpoint_parler = "parler-tts/parler_tts_mini_v0.1"
24
-
25
- # model_parler = ParlerTTSForConditionalGeneration.from_pretrained(checkpoint_parler).to(device)
26
- # tokenizer = AutoTokenizer.from_pretrained(checkpoint_parler)
27
- # feature_extractor = AutoFeatureExtractor.from_pretrained(checkpoint_parler)
28
-
29
- # SAMPLE_RATE = feature_extractor.sampling_rate
30
- # SEED = 42
31
-
32
- # checkpoint_mms_tts_eng = "facebook/mms-tts-eng"
33
-
34
- # pipe_tts = pipeline("text-to-speech", model=checkpoint_mms_tts_eng)
 
2
  # https://magictool.ai/tool/text-to-hex-converter/ Here ArcticMonkey is name of version and rest of all is data and time
3
 
4
  import torch
 
5
  from transformers import pipeline
6
 
 
7
  device = 0 if torch.cuda.is_available() else "cpu"
8
 
9
  checkpoint_whisper = "openai/whisper-medium"
 
13
  model=checkpoint_whisper,
14
  device=device,
15
  chunk_length_s=30,
16
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
command/basic/basic_commands.py DELETED
@@ -1,37 +0,0 @@
1
- import os
2
- import logging
3
-
4
- import streamlit as st
5
-
6
- from typing import Optional, Dict, Any
7
- from ..command_interface import CommandInterface
8
-
9
-
10
- class Documentation(CommandInterface):
11
- def __init__(self,
12
- command_file: str,
13
- commands: Optional[str],
14
- llm: Any,
15
- id: int):
16
-
17
- self.command_file = command_file
18
-
19
- if commands is not None:
20
- self.commands = commands
21
- else:
22
- self.commands = []
23
-
24
- self.llm = llm
25
-
26
- self.id = id
27
-
28
- def get_command() -> Dict[str, Dict[str, list]]:
29
-
30
- pass
31
-
32
- def is_contains(commands: Optional[str], llm: Any, id: Any, command: Dict[str, Dict[str, list]]) -> bool:
33
-
34
- pass
35
-
36
- def execute() -> Any:
37
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
command/command_interface.py DELETED
@@ -1,20 +0,0 @@
1
- from typing import Optional, Dict, Any
2
- from abc import ABC, abstractmethod
3
-
4
-
5
- class CommandHandler(ABC):
6
- # {"base": {"name": command}, "advance": {"name": command}}
7
- @abstractmethod
8
- def get_command() -> Dict[str, Dict[str, list]]:
9
- "Method to get command from yaml file and return dictionary"
10
- pass
11
-
12
- @abstractmethod
13
- def is_contains() -> bool:
14
- """Method to check contains command in text or not"""
15
- pass
16
-
17
- @abstractmethod
18
- def execute() -> Any:
19
- "Method to execute command"
20
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
command/commands.yaml DELETED
@@ -1,25 +0,0 @@
1
- # This file is needed to check what the user wants to do when using Chelsea. The structure is similar to the prompts.
2
- # id: just a number between 1 and 100, an integer
3
- # command_dict: all options for how the user can pronounce the command, divided into two languages - English (en) and Ukrainian (ua)
4
- # description: a simple description of the command
5
-
6
- commands:
7
- - id: 1
8
- command_dict: {"en": ["Documentation"], "ua": ["ДокумСнтація"]}
9
- description: receive documentation
10
- - id: 2
11
- command_dict: {"en": ["Buy Me A Coffee", "BMAC", "Coffee", "Pay the ghost"], "ua": ["Кава", "Π—Π°ΠΏΠ»Π°Ρ‚ΠΈ ΠΏΡ€ΠΈΠΌΠ°Ρ€Ρ–"]}
12
- description: support project on Buy Me A Coffee
13
- - id: 3
14
- command_dict: {"en": ["Translate"], "ua": ["ΠŸΠ΅Ρ€Π΅ΠΊΠ»Π°Π΄"]}
15
- description: translate
16
- - id: 4
17
- command_dict: {"en": ["Change model"], "ua": ["Π—ΠΌΡ–Π½ΠΈΡ‚ΠΈ модСль"]}
18
- description: change model
19
- - id: 5
20
- command_dict: {"en": ["Yes", "Yeah", "Yep"], "ua": ["Вак", "Ага"]}
21
- description: positive agreement
22
- - id: 6
23
- command_dict: {"en": ["No", "Nah"], "ua": ["Ні", "Ніт", "Ніц"]}
24
- description: negative agreement
25
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
command/utils/form_documentation.py DELETED
@@ -1,45 +0,0 @@
1
- import os
2
- import logging
3
-
4
- from typing import Optional
5
-
6
- TEMPLATE = """
7
- A complete list of commands that are designed to facilitate the use of the voice assistant Chelsea.
8
- The complete list consists of no more than 100 commands written in a txt file.
9
- The list of commands will be updated as the assistant is developed.
10
- The first version of the programme (Arctic Monkeys) contains a total of 6 commands.
11
-
12
- The list of commands and their use.
13
-
14
- documentation command: first used to inform you how you able to interact with assistant. To call this command just say Documentation in english or
15
- ДокумСнтація in Ukrainian. Note you can use assistant without those commands, however for getting more advance expirience i strongly recommend use them.
16
-
17
- bmac command: Support author on Buy Me a Coffee. To activate this command you can spell in english Buy Me A Coffee, BMAC, Coffee, Pay the ghost and
18
- in Ukrainian Кава, Π—Π°ΠΏΠ»Π°Ρ‚ΠΈ ΠΏΡ€ΠΈΠΌΠ°Ρ€Ρ–.
19
-
20
- translate command: Use for translating speech in language which you choice. Commands to use it in english is Translate and in Ukrainian is ΠŸΠ΅Ρ€Π΅ΠΊΠ»Π°Π΄.
21
-
22
- change model command: You able to choose model using hugging face api (hf) or local model using Llama. List of models for hf are: Mistaril and Tinyllama and for lc: Phi 3 and TinyLlama either.
23
- To call command use in english Change model and in Ukrainian Π—ΠΌΡ–Π½ΠΈΡ‚ΠΈ модСль.
24
-
25
- yes command: Command to confirm your consent. To call command use in english Yes, Yeah, Yep and in Ukrainian Вак, Ага.
26
-
27
- no command: Command to confirm your disagreement. To call command use in english No, Nah and in Ukrainian Ні, Ніт, Ніц.
28
- """
29
-
30
-
31
- def generate_doc(path: Optional[str] = None) -> Optional[str]:
32
- if path is not None:
33
- file = os.path.join(path, NAME)
34
- else:
35
- current_dir = os.path.dirname(os.path.realpath(__file__))
36
- file = os.path.join(current_dir, NAME)
37
-
38
- logging.info(file)
39
-
40
- try:
41
- with open(file, 'w') as f:
42
- f.write(TEMPLATE)
43
- return file
44
- except IOError as e:
45
- logging.error(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
command/utils/load_yaml.py DELETED
@@ -1,12 +0,0 @@
1
- import yaml
2
- from typing import Any
3
-
4
-
5
- def load_commands_from_yaml(file_path: str) -> Any:
6
- try:
7
- with open(file_path, 'r') as file:
8
- commands_data = yaml.safe_load(file)
9
- print(commands_data)
10
- return commands_data.get('commands', [])
11
- except IOError as e:
12
- print(f"Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
infrastructure/__init__.py DELETED
File without changes
languages.txt CHANGED
@@ -1,10 +1,111 @@
1
- 'english', 'chinese', 'german', 'spanish', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan',
2
- 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech',
3
- 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam',
4
- 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian',
5
- 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi',
6
- 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati',
7
- 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish',
8
- 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese',
9
- 'cantonese', 'burmese', 'valencian', 'flemish', 'haitian', 'letzeburgesch', 'pushto', 'panjabi', 'moldavian', 'moldovan', 'sinhalese',
10
- 'castilian', 'mandarin'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'english',
2
+ 'chinese',
3
+ 'german',
4
+ 'spanish',
5
+ 'korean',
6
+ 'french',
7
+ 'japanese',
8
+ 'portuguese',
9
+ 'turkish',
10
+ 'polish',
11
+ 'catalan',
12
+ 'dutch',
13
+ 'arabic',
14
+ 'swedish',
15
+ 'italian',
16
+ 'indonesian',
17
+ 'hindi',
18
+ 'finnish',
19
+ 'vietnamese',
20
+ 'hebrew',
21
+ 'ukrainian',
22
+ 'greek',
23
+ 'malay',
24
+ 'czech',
25
+ 'romanian',
26
+ 'danish',
27
+ 'hungarian',
28
+ 'tamil',
29
+ 'norwegian',
30
+ 'thai',
31
+ 'urdu',
32
+ 'croatian',
33
+ 'bulgarian',
34
+ 'lithuanian',
35
+ 'latin',
36
+ 'maori',
37
+ 'malayalam',
38
+ 'welsh',
39
+ 'slovak',
40
+ 'telugu',
41
+ 'persian',
42
+ 'latvian',
43
+ 'bengali',
44
+ 'serbian',
45
+ 'azerbaijani',
46
+ 'slovenian',
47
+ 'kannada',
48
+ 'estonian',
49
+ 'macedonian',
50
+ 'breton',
51
+ 'basque',
52
+ 'icelandic',
53
+ 'armenian',
54
+ 'nepali',
55
+ 'mongolian',
56
+ 'bosnian',
57
+ 'kazakh',
58
+ 'albanian',
59
+ 'swahili',
60
+ 'galician',
61
+ 'marathi',
62
+ 'punjabi',
63
+ 'sinhala',
64
+ 'khmer',
65
+ 'shona',
66
+ 'yoruba',
67
+ 'somali',
68
+ 'afrikaans',
69
+ 'occitan',
70
+ 'georgian',
71
+ 'belarusian',
72
+ 'tajik',
73
+ 'sindhi',
74
+ 'gujarati',
75
+ 'amharic',
76
+ 'yiddish',
77
+ 'lao',
78
+ 'uzbek',
79
+ 'faroese',
80
+ 'haitian creole',
81
+ 'pashto',
82
+ 'turkmen',
83
+ 'nynorsk',
84
+ 'maltese',
85
+ 'sanskrit',
86
+ 'luxembourgish',
87
+ 'myanmar',
88
+ 'tibetan',
89
+ 'tagalog',
90
+ 'malagasy',
91
+ 'assamese',
92
+ 'tatar',
93
+ 'hawaiian',
94
+ 'lingala',
95
+ 'hausa',
96
+ 'bashkir',
97
+ 'javanese',
98
+ 'sundanese',
99
+ 'cantonese',
100
+ 'burmese',
101
+ 'valencian',
102
+ 'flemish',
103
+ 'haitian',
104
+ 'letzeburgesch',
105
+ 'pushto',
106
+ 'panjabi',
107
+ 'moldavian',
108
+ 'moldovan',
109
+ 'sinhalese',
110
+ 'castilian',
111
+ 'mandarin'
llm/__init__.py DELETED
File without changes
llm/apimodels/gemini_model.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import logging
4
+
5
+ from llm.utils.hf_interface import HFInterface
6
+
7
+ from langchain_google_genai import GoogleGenerativeAI
8
+ from abc import ABC
9
+
10
+ logger = logging.getLogger(__name__)
11
+ logger.setLevel(logging.ERROR)
12
+
13
+ file_handler = logging.FileHandler(
14
+ "logs/chelsea_llm_gemini.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{entity}.log"
15
+ logger.setLevel(logging.INFO) # informed
16
+
17
+ formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
18
+ file_handler.setFormatter(formatted)
19
+
20
+ logger.addHandler(file_handler)
21
+ logger.info("Getting information from apimodel module")
22
+
23
+
24
+ # 429 - You've exceeded the rate limit.
25
+ # 400 - The request body is malformed.
26
+ # 403 - Your API key doesn't have the required permissions.
27
+ # 404 - The requested resource wasn't found.
28
+ # 500 - An unexpected error occurred on Google's side.
29
+ # 503 - The service may be temporarily overloaded or down.
30
+
31
+ # Π―ΠΊΡ‰ΠΎ Ρ‚Ρ€Π°ΠΏΠΈΡ‚ΡŒΡΡ ΠΎΠ΄Π½Π° Π· Ρ†ΠΈΡ… ΠΏΠΎΠΌΠΈΠ»ΠΎΠΊ , Ρ‚ΠΎ слід пСрСмкнутися Π½Π° HF , якщо якась модСль Π·Π°Π½Π°Π΄Ρ‚ΠΎ ΠΏΠΎΠ²Ρ–Π»ΡŒΠ½Π°
32
+ # пСрСмкнутися Π½Π° Ρ–Π½ΡˆΡƒ після закінчСння виконання Ρ‚Π΅ΠΊΡƒΡ‰ΠΎΡ—, якщо Ρ‚Ρ€Π°Π±Π»ΠΈ Π· HF пСрСмкнутися Π½Π° Π»ΠΎΠΊΠ°Π»ΡŒΠ½Ρƒ,
33
+ # якщо ΠΆ Ρƒ користувача відсутнє Ρ–Π½Ρ‚Π΅Ρ€Π½Π΅Ρ‚ зʼєднання, Ρ‚ΠΎ Π½Ρ–Ρ‡ΠΈΠΌ Π½Π΅ Π·Π°Ρ€Π°Π΄ΠΈΡˆ,
34
+ # Ρ…Ρ–Π±Π° Ρ‰ΠΎ ΠΏΡ€ΠΎΠΏΠΎΠ½ΡƒΠ²Π°Ρ‚ΠΈ скачати Ρ€Π΅ΠΏΠΎΠ·ΠΈΡ‚ΠΎΡ€Ρ–ΠΉ.
35
+
36
+ _api = os.environ.get("GEMINI_API_TOKEN")
37
+
38
+
39
+ class Gemini(HFInterface, ABC):
40
+ """
41
+ This class represents a Gemini large language model interface.
42
+
43
+ It inherits from `HFInterface` (likely an interface from a Hugging Face library)
44
+ and `ABC` (for abstract base class) to enforce specific functionalities.
45
+ """
46
+
47
+ def __init__(self):
48
+ """
49
+ Initializer for the Gemini class.
50
+
51
+ - Raises a `ValueError` if the provided API key is None or an empty string.
52
+ - Creates an instance of `GoogleGenerativeAI` using the specified model name
53
+ ("gemini-1.5-flash") and the stored API key.
54
+ """
55
+
56
+ if not _api:
57
+ raise ValueError(f"Your api is None or empty string {_api}, please provide a Gemini API")
58
+
59
+ #{
60
+ # 'model': 'gemini-1.5-flash', 'temperature': 0.7, 'top_p': None,
61
+ # 'top_k': None, 'max_output_tokens': None, 'candidate_count': 1
62
+ #}
63
+ self.llm = GoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=_api)
64
+
65
+ def execution(self) -> GoogleGenerativeAI:
66
+ """
67
+ This method attempts to return the underlying `llm` (likely a language model object).
68
+
69
+ It wraps the retrieval in a `try-except` block to catch potential exceptions.
70
+ On success, it returns the `llm` object.
71
+ On failure, it logs an error message with the exception details using a logger
72
+ (assumed to be available elsewhere).
73
+ """
74
+ try:
75
+ return self.llm
76
+ except Exception as e:
77
+ logger.error("Something wrong with Gemini api", exc_info=e)
78
+ print(f"Something wrong with Gemini api: {e}")
79
+
80
+ def model_name(self):
81
+ """
82
+ Simple method that returns the hardcoded model name ("gemini-1.5-flash").
83
+
84
+ This can be useful for identifying the specific model being used.
85
+ """
86
+ return "gemini-1.5-flash"
87
+
88
+ def __str__(self):
89
+ """
90
+ Defines the string representation of the Gemini object for human readability.
91
+
92
+ It returns a string indicating that it's a "Gemini model" and appends the model name
93
+ obtained from the `model_name` method.
94
+ """
95
+ return f"Gemini model: {self.model_name()}"
96
+
97
+ def __repr__(self):
98
+ """
99
+ Defines the representation of the Gemini object for debugging purposes.
100
+
101
+ It uses `hasattr` to check if the `llm` attribute is set.
102
+ - If `llm` exists, it returns a string like `Gemini(llm=GoogleGenerativeAI(...))`,
103
+ showing the class name and the `llm` object information.
104
+ - If `llm` is not yet set (during initialization), it returns
105
+ `Gemini(llm=not initialized)`, indicating the state.
106
+ """
107
+ llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
108
+ return f"{self.__class__.__name__}({llm_info})"
109
+
110
+
llm/apimodels/hf_model.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ from abc import ABC
5
+ from typing import Any
6
+
7
+ from llm.utils.hf_interface import HFInterface
8
+ from llm.utils.config import config
9
+
10
+ from langchain_community.llms import HuggingFaceEndpoint
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+ logger.setLevel(logging.ERROR) # because if something went wrong in execution, application can't be work anyway
15
+
16
+ file_handler = logging.FileHandler(
17
+ "logs/chelsea_llm_huggingfacehub.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{dir_name}.log"
18
+ logger.setLevel(logging.INFO) # informed
19
+
20
+ formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
21
+ file_handler.setFormatter(formatted)
22
+
23
+ logger.addHandler(file_handler)
24
+ logger.info("Getting information from apimodel module")
25
+
26
+ _api = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
27
+
28
+ class HF_Mistaril(HFInterface, ABC):
29
+ """
30
+ This class represents an interface for the Mistaril large language model from Hugging Face.
31
+
32
+ It inherits from `HFInterface` (likely an interface from a Hugging Face library)
33
+ and `ABC` (for abstract base class) to enforce specific functionalities.
34
+ """
35
+
36
+ def __init__(self):
37
+ """
38
+ Initializer for the `HF_Mistaril` class.
39
+
40
+ - Retrieves configuration values for the Mistaril model from a `config` dictionary:
41
+ - `repo_id`: The ID of the repository containing the Mistaril model on Hugging Face.
42
+ - `max_length`: Maximum length of the generated text.
43
+ - `temperature`: Controls randomness in the generation process.
44
+ - `top_k`: Restricts the vocabulary used for generation.
45
+ - Raises a `ValueError` if the `api` key (presumably stored elsewhere) is missing.
46
+ - Creates an instance of `HuggingFaceEndpoint` using the retrieved configuration
47
+ and the `api` key.
48
+ """
49
+
50
+ repo_id = config["HF_Mistrail"]["model"]
51
+ max_length = config["HF_Mistrail"]["max_new_tokens"]
52
+ temperature = config["HF_Mistrail"]["temperature"]
53
+ top_k = config["HF_Mistrail"]["top_k"]
54
+
55
+ if not _api:
56
+ raise ValueError(f"API key not provided {_api}")
57
+
58
+ self.llm = HuggingFaceEndpoint(
59
+ repo_id=repo_id, max_length=max_length, temperature=temperature, top_k=top_k, token=_api
60
+ )
61
+
62
+ def execution(self) -> Any:
63
+ """
64
+ This method attempts to return the underlying `llm` (likely a language model object).
65
+
66
+ It wraps the retrieval in a `try-except` block to catch potential exceptions.
67
+ On success, it returns the `llm` object.
68
+ On failure, it logs an error message with the exception details using a logger
69
+ (assumed to be available elsewhere).
70
+ """
71
+ try:
72
+ return self.llm # `invoke()`
73
+ except Exception as e:
74
+ logger.error("Something wrong with API or HuggingFaceEndpoint", exc_info=e)
75
+ print(f"Something wrong with API or HuggingFaceEndpoint: {e}")
76
+
77
+ def model_name(self):
78
+ """
79
+ Simple method that returns the Mistaril model name from the configuration.
80
+
81
+ This can be useful for identifying the specific model being used.
82
+ """
83
+ return config["HF_Mistrail"]["model"]
84
+
85
+ def __str__(self):
86
+ """
87
+ Defines the string representation of the `HF_Mistaril` object for human readability.
88
+
89
+ It combines the class name and the model name retrieved from the `model_name` method
90
+ with an underscore separator.
91
+ """
92
+ return f"{self.__class__.__name__}_{self.model_name()}"
93
+
94
+ def __repr__(self):
95
+ """
96
+ Defines the representation of the `HF_Mistaril` object for debugging purposes.
97
+
98
+ It uses `hasattr` to check if the `llm` attribute is set.
99
+ - If `llm` exists, it returns a string like `HF_Mistaril(llm=HuggingFaceEndpoint(...))`,
100
+ showing the class name and the `llm` object information.
101
+ - If `llm` is not yet set (during initialization), it returns
102
+ `HF_Mistaril(llm=not initialized)`, indicating the state.
103
+ """
104
+ llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
105
+ return f"{self.__class__.__name__}({llm_info})"
106
+
107
+
108
+
109
+ class HF_TinyLlama(HFInterface, ABC):
110
+ """
111
+ This class represents an interface for the TinyLlama large language model from Hugging Face.
112
+
113
+ It inherits from `HFInterface` (likely an interface from a Hugging Face library)
114
+ and `ABC` (for abstract base class) to enforce specific functionalities.
115
+ """
116
+
117
+ def __init__(self):
118
+ """
119
+ Initializer for the `HF_TinyLlama` class.
120
+
121
+ - Retrieves configuration values for the Mistaril model from a `config` dictionary:
122
+ - `repo_id`: The ID of the repository containing the TinyLlama model on Hugging Face.
123
+ - `max_length`: Maximum length of the generated text.
124
+ - `temperature`: Controls randomness in the generation process.
125
+ - `top_k`: Restricts the vocabulary used for generation.
126
+ - Raises a `ValueError` if the `api` key (presumably stored elsewhere) is missing.
127
+ - Creates an instance of `HuggingFaceEndpoint` using the retrieved configuration
128
+ and the `api` key.
129
+ """
130
+
131
+ repo_id = config["HF_TinyLlama"]["model"]
132
+ max_length = config["HF_TinyLlama"]["max_new_tokens"]
133
+ temperature = config["HF_TinyLlama"]["temperature"]
134
+ top_k = config["HF_TinyLlama"]["top_k"]
135
+
136
+ if not _api:
137
+ raise ValueError(f"API key not provided {_api}")
138
+
139
+ self.llm = HuggingFaceEndpoint(
140
+ repo_id=repo_id, max_length=max_length, temperature=temperature, top_k=top_k, token=_api
141
+ )
142
+
143
+ def execution(self) -> Any:
144
+ """
145
+ This method attempts to return the underlying `llm` (likely a language model object).
146
+
147
+ It wraps the retrieval in a `try-except` block to catch potential exceptions.
148
+ On success, it returns the `llm` object.
149
+ On failure, it logs an error message with the exception details using a logger
150
+ (assumed to be available elsewhere).
151
+ """
152
+ try:
153
+ return self.llm
154
+ except Exception as e:
155
+ logger.error("Something wrong with API or HuggingFaceEndpoint", exc_info=e)
156
+ print(f"Something wrong with API or HuggingFaceEndpoint: {e}")
157
+
158
+ def model_name(self):
159
+ """
160
+ Simple method that returns the TinyLlama model name from the configuration.
161
+
162
+ This can be useful for identifying the specific model being used.
163
+ """
164
+ return config["HF_TinyLlama"]["model"]
165
+
166
+ def __str__(self):
167
+ """
168
+ Defines the string representation of the `HF_TinyLlama` object for human readability.
169
+
170
+ It combines the class name and the model name retrieved from the `model_name` method
171
+ with an underscore separator.
172
+ """
173
+ return f"{self.__class__.__name__}_{self.model_name()}"
174
+
175
+ def __repr__(self):
176
+ """
177
+ Defines the representation of the `HF_TinyLlama` object for debugging purposes.
178
+
179
+ It uses `hasattr` to check if the `llm` attribute is set.
180
+ - If `llm` exists, it returns a string like `HF_TinyLlama(llm=HuggingFaceEndpoint(...))`,
181
+ showing the class name and the `llm` object information.
182
+ - If `llm` is not yet set (during initialization), it returns
183
+ `HF_TinyLlama(llm=not initialized)`, indicating the state.
184
+ """
185
+ llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
186
+ return f"{self.__class__.__name__}({llm_info})"
llm/huggingfacehub/hf_model.py DELETED
@@ -1,140 +0,0 @@
1
- import os
2
- import yaml
3
- import logging
4
-
5
- from abc import ABC
6
-
7
- from llm.hf_interface import HFInterface
8
- from llm.config import config
9
-
10
- from langchain.prompts import PromptTemplate
11
- from langchain.chains import LLMChain
12
- from langchain.llms import HuggingFaceHub
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- logger.setLevel(logging.CRITICAL) # because if something went wrong in execution, application can't be work anyway
17
-
18
- file_handler = logging.FileHandler(
19
- "logs/chelsea_llm_huggingfacehub.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{dir_name}.log"
20
- logger.setLevel(logging.INFO) # informed
21
-
22
- formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
23
- file_handler.setFormatter(formatted)
24
-
25
- logger.addHandler(file_handler)
26
-
27
- logger.info("Getting information from hf_model module")
28
-
29
- llm_dir = '/home/user/app/llm/'
30
-
31
- path_to_yaml = os.path.join(os.getcwd(), "llm/prompts.yaml")
32
-
33
- print("Path to prompts : ", path_to_yaml)
34
-
35
-
36
- class HF_Mistaril(HFInterface, ABC):
37
- def __init__(self, prompt_entity: str, prompt_id: int = 0):
38
- self.prompt_entity = prompt_entity
39
- self.prompt_id = prompt_id
40
-
41
- self.model_config = config["HF_Mistrail"]
42
-
43
- # Π”ΠΎΠ΄Π°Ρ‚ΠΈ repetition_penalty, task?, top_p, stop_sequences
44
- self.llm = HuggingFaceHub(
45
- repo_id=self.model_config["model"],
46
- # temperature=self.model_config["temperature"],
47
- # max_new_tokens=self.model_config["max_new_tokens"],
48
- # top_k=self.model_config["top_k"],
49
- model_kwargs={"load_in_8bit": self.model_config["load_in_8bit"],
50
- "temperature": self.model_config["temperature"],
51
- "max_new_tokens": self.model_config["max_new_tokens"],
52
- "top_k": self.model_config["top_k"],
53
- },
54
- huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
55
- )
56
-
57
- @staticmethod
58
- def __read_yaml():
59
- try:
60
- yaml_file = os.path.join(llm_dir, 'prompts.yaml')
61
- with open(yaml_file, 'r') as f:
62
- data = yaml.safe_load(f)
63
- f.close()
64
- return data
65
- except Exception as e:
66
- print(f"Execution filed : {e}")
67
- logger.error(msg="Execution filed", exc_info=e)
68
-
69
- def execution(self):
70
- try:
71
- data = self.__read_yaml()
72
- prompts = data["prompts"][
73
- self.prompt_id] #get second prompt from yaml, need change id parameter to get other prompt
74
- template = prompts["prompt_template"]
75
- prompt = PromptTemplate(template=template, input_variables=["entity"])
76
- llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
77
- output = llm_chain.invoke(self.prompt_entity)
78
- return output["text"]
79
- except Exception as e:
80
- print(f"Execution filed : {e}")
81
- logger.critical(msg="Execution filed", exc_info=e)
82
-
83
- def __str__(self):
84
- return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
85
-
86
- def __repr__(self):
87
- return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
88
-
89
-
90
- class HF_TinyLlama(HFInterface, ABC):
91
- def __init__(self, prompt_entity: str, prompt_id: int = 0):
92
- self.prompt_entity = prompt_entity
93
- self.prompt_id = prompt_id
94
-
95
- self.model_config = config["HF_TinyLlama"]
96
-
97
- self.llm = HuggingFaceHub(
98
- repo_id=self.model_config["model"],
99
- # temperature=self.model_config["temperature"],
100
- # max_new_tokens=self.model_config["max_new_tokens"],
101
- # top_k=self.model_config["top_k"],
102
- model_kwargs={"load_in_8bit": self.model_config["load_in_8bit"],
103
- "temperature": self.model_config["temperature"],
104
- "max_new_tokens": self.model_config["max_new_tokens"],
105
- "top_k": self.model_config["top_k"],
106
- },
107
- huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
108
- )
109
-
110
- @staticmethod
111
- def __read_yaml():
112
- try:
113
- yaml_file = os.path.join(llm_dir, 'prompts.yaml')
114
- with open(yaml_file, 'r') as f:
115
- data = yaml.safe_load(f)
116
- f.close()
117
- return data
118
- except Exception as e:
119
- print(f"Execution filed : {e}")
120
- logger.error(msg="Execution filed", exc_info=e)
121
-
122
- def execution(self):
123
- try:
124
- data = self.__read_yaml()
125
- prompts = data["prompts"][
126
- self.prompt_id] #get second prompt from yaml, need change id parameter to get other prompt
127
- template = prompts["prompt_template"]
128
- prompt = PromptTemplate(template=template, input_variables=["entity"])
129
- llm_chain = LLMChain(prompt=prompt, llm=self.llm, verbose=True)
130
- output = llm_chain.invoke(self.prompt_entity)
131
- return output["text"]
132
- except Exception as e:
133
- print(f"Execution filed : {e}")
134
- logger.critical(msg="Execution filed", exc_info=e)
135
-
136
- def __str__(self):
137
- return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
138
-
139
- def __repr__(self):
140
- return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llm/llamacpp/lc_model.py CHANGED
@@ -3,20 +3,14 @@ import logging
3
  from abc import ABC
4
 
5
  import requests
6
- import yaml
7
- from langchain.prompts import PromptTemplate
8
- from langchain_community.llms import LlamaCpp
9
-
10
- from llm.config import config
11
- from llm.lc_interface import LCInterface
12
 
13
- # print(os.getcwd())
14
 
15
- print("Current path : ", os.path.dirname(os.path.realpath(__file__)))
 
16
 
17
  logger = logging.getLogger(__name__)
18
-
19
- logger.setLevel(logging.CRITICAL) # because if something went wrong in execution application can't be work anymore
20
 
21
  file_handler = logging.FileHandler(
22
  "logs/chelsea_llm_llamacpp.log") # for all modules template for logs file is "logs/chelsea_{module_name}_{dir_name}.log"
@@ -24,27 +18,16 @@ logger.setLevel(logging.INFO) # informed
24
 
25
  formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
26
  file_handler.setFormatter(formatted)
27
-
28
  logger.addHandler(file_handler)
29
 
30
- try:
31
- os.chdir('/home/user/app/llm/')
32
- except FileNotFoundError:
33
- print("Error: Could not move up. You might be at the root directory.")
34
-
35
  work_dir = os.getcwd()
36
-
37
- models_dir = os.path.join(work_dir, "models")
38
 
39
 
40
  class LC_TinyLlama(LCInterface, ABC):
41
- def __init__(self, prompt_entity: str, prompt_id: int = 0):
42
- self.prompt_entity = prompt_entity
43
- self.prompt_id = prompt_id
44
-
45
  self.model_config = config["LC_TinyLlama-1.1B-Chat-v1.0-GGUF"]
46
 
47
-
48
  try:
49
  get_file = requests.get(self.model_config["model_url"])
50
  if get_file.status_code == 200:
@@ -60,43 +43,23 @@ class LC_TinyLlama(LCInterface, ABC):
60
  print(f"Error while writing a file to directory : {e}")
61
  logger.error(msg="Error while write a file to directory", exc_info=e)
62
 
63
- @staticmethod
64
- def __read_yaml():
65
- try:
66
- yaml_file = os.path.join(work_dir, 'prompts.yaml')
67
- with open(yaml_file, 'r') as file:
68
- data = yaml.safe_load(file)
69
- return data
70
- except Exception as e:
71
- print(f"Execution filed : {e}")
72
- logger.error(msg="Execution filed", exc_info=e)
73
 
74
  def execution(self):
75
  try:
76
- data = self.__read_yaml()
77
- prompts = data["prompts"][
78
- self.prompt_id] # to get second prompt from yaml, need change id parameter to get other prompt
79
- template = prompts["prompt_template"]
80
- prompt = PromptTemplate(template=template, input_variables=["entity"])
81
-
82
- llm = LlamaCpp(
83
- model_path=os.path.join(models_dir, self.model_config["model_name"]),
84
- temperature=self.model_config["temperature"],
85
- max_tokens=self.model_config["max_tokens"],
86
- top_p=self.model_config["top_p"],
87
- top_k=self.model_config["top_k"],
88
- # callback_manager=callback_manager,
89
- verbose=True, # Verbose is required to pass to the callback manager
90
- )
91
-
92
- logger.info(f"Check llm : {llm}")
93
-
94
- llm_chain = prompt | llm
95
- output = llm_chain.invoke({"question": self.prompt_entity})
96
- return output
97
  except Exception as e:
98
- print(f"Execution filed : {e}")
99
- logger.critical(msg="Execution filed", exc_info=e)
 
100
 
101
  def clear_llm(self, unused_model_dict, current_lc):
102
  # If unused_model_dict is not empty
@@ -108,8 +71,10 @@ class LC_TinyLlama(LCInterface, ABC):
108
  # delete files from models directory except of current_lc
109
  os.remove(value)
110
  logger.info(f"Successfully deleted file {value}")
 
111
  else:
112
  logger.info(f"Unfortunately dictionary empty or None")
 
113
 
114
  def get_unused(self, current_lc):
115
 
@@ -121,21 +86,22 @@ class LC_TinyLlama(LCInterface, ABC):
121
  return {item: unused_model_file}
122
  else:
123
  return None
 
 
 
124
 
125
  def __str__(self):
126
- return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
127
 
128
  def __repr__(self):
129
- return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
130
-
 
131
 
132
  class LC_Phi3(LCInterface, ABC):
133
- def __init__(self, prompt_entity: str, prompt_id: int = 0):
134
- self.prompt_entity = prompt_entity
135
- self.prompt_id = prompt_id
136
-
137
  self.model_config = config["LC_Phi-3-mini-4k-instruct-gguf"]
138
-
139
  try:
140
  get_file = requests.get(self.model_config["model_url"])
141
  if get_file.status_code == 200:
@@ -143,50 +109,33 @@ class LC_Phi3(LCInterface, ABC):
143
  with open(path_to_model, "wb") as f:
144
  f.write(get_file.content)
145
  logger.info("Model file successfully recorded")
 
146
  f.close()
147
  except FileExistsError:
 
148
  logger.info(f"Model file {path_to_model} already exists. Skipping download.")
149
  except OSError as e:
150
  print(f"Error while writing a file to directory : {e}")
151
  logger.error(msg="Error while write a file to directory", exc_info=e)
152
 
153
- @staticmethod
154
- def __read_yaml():
155
- try:
156
- yaml_file = os.path.join(work_dir, 'prompts.yaml')
157
- with open(yaml_file, 'r') as file:
158
- data = yaml.safe_load(file)
159
- return data
160
- except Exception as e:
161
- print(f"Execution filed : {e}")
162
- logger.error(msg="Execution filed", exc_info=e)
163
 
164
  def execution(self):
165
  try:
166
- data = self.__read_yaml()
167
- prompts = data["prompts"][
168
- self.prompt_id] # get second prompt from yaml, need change id parameter to get other prompt
169
- template = prompts["prompt_template"]
170
- prompt = PromptTemplate(template=template, input_variables=["entity"])
171
-
172
- llm = LlamaCpp(
173
- model_path=os.path.join(models_dir, self.model_config["model_name"]),
174
- temperature=self.model_config["temperature"],
175
- max_tokens=self.model_config["max_tokens"],
176
- top_p=self.model_config["top_p"],
177
- top_k=self.model_config["top_k"],
178
- # callback_manager=callback_manager,
179
- verbose=True, # Verbose is required to pass to the callback manager
180
- )
181
-
182
- logger.info(f"Check llm : {llm}")
183
-
184
- llm_chain = prompt | llm
185
- output = llm_chain.invoke({"question": self.prompt_entity})
186
- return output
187
  except Exception as e:
188
- print(f"Execution filed : {e}")
189
- logger.critical(msg="Execution filed", exc_info=e)
 
190
 
191
  def clear_llm(self, unused_model_dict, current_lc):
192
  # If unused_model_dict is not empty
@@ -198,8 +147,10 @@ class LC_Phi3(LCInterface, ABC):
198
  # delete files from models directory except of current_lc
199
  os.remove(value)
200
  logger.info(f"Successfully deleted file {value}")
 
201
  else:
202
  logger.info(f"Unfortunately dictionary empty or None")
 
203
 
204
  def get_unused(self, current_lc):
205
 
@@ -211,9 +162,13 @@ class LC_Phi3(LCInterface, ABC):
211
  return {item: unused_model_file}
212
  else:
213
  return None
 
 
 
214
 
215
  def __str__(self):
216
- return f"prompt_entity={self.prompt_entity}, prompt_id={self.prompt_id}"
217
 
218
  def __repr__(self):
219
- return f"{self.__class__.__name__}(prompt_entity: {type(self.prompt_entity)} = {self.prompt_entity}, prompt_id: {type(self.prompt_id)} = {self.prompt_id})"
 
 
3
  from abc import ABC
4
 
5
  import requests
 
 
 
 
 
 
6
 
7
+ from langchain_community.llms import LlamaCpp
8
 
9
+ from llm.utils.config import config
10
+ from llm.utils.lc_interface import LCInterface
11
 
12
  logger = logging.getLogger(__name__)
13
+ logger.setLevel(logging.ERROR) # because if something went wrong in execution application can't be work anymore
 
14
 
15
  file_handler = logging.FileHandler(
16
  "logs/chelsea_llm_llamacpp.log") # for all modules template for logs file is "logs/chelsea_{module_name}_{dir_name}.log"
 
18
 
19
  formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
20
  file_handler.setFormatter(formatted)
 
21
  logger.addHandler(file_handler)
22
 
 
 
 
 
 
23
  work_dir = os.getcwd()
24
+ models_dir = os.path.join(work_dir, "llm/models")
 
25
 
26
 
27
  class LC_TinyLlama(LCInterface, ABC):
28
+ def __init__(self):
 
 
 
29
  self.model_config = config["LC_TinyLlama-1.1B-Chat-v1.0-GGUF"]
30
 
 
31
  try:
32
  get_file = requests.get(self.model_config["model_url"])
33
  if get_file.status_code == 200:
 
43
  print(f"Error while writing a file to directory : {e}")
44
  logger.error(msg="Error while write a file to directory", exc_info=e)
45
 
46
+ self.llm = LlamaCpp(
47
+ model_path=os.path.join(models_dir, self.model_config["model_name"]),
48
+ temperature=self.model_config["temperature"],
49
+ max_tokens=self.model_config["max_tokens"],
50
+ top_p=self.model_config["top_p"],
51
+ top_k=self.model_config["top_k"],
52
+ # callback_manager=callback_manager,
53
+ verbose=True, # Verbose is required to pass to the callback manager
54
+ )
 
55
 
56
  def execution(self):
57
  try:
58
+ return self.llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  except Exception as e:
60
+ print(f"Execution filed in LC_TinyLlama execution function: {e}")
61
+ logger.critical(msg="Execution filed in LC_TinyLlama execution function", exc_info=e)
62
+ return None
63
 
64
  def clear_llm(self, unused_model_dict, current_lc):
65
  # If unused_model_dict is not empty
 
71
  # delete files from models directory except of current_lc
72
  os.remove(value)
73
  logger.info(f"Successfully deleted file {value}")
74
+ print(f"Successfully deleted file {value}")
75
  else:
76
  logger.info(f"Unfortunately dictionary empty or None")
77
+ print(f"Unfortunately dictionary {unused_model_dict} empty or None")
78
 
79
  def get_unused(self, current_lc):
80
 
 
86
  return {item: unused_model_file}
87
  else:
88
  return None
89
+
90
+ def model_name(self):
91
+ return self.model_config["model_name"]
92
 
93
  def __str__(self):
94
+ return f"{self.__class__.__name__}_{self.model_name()}"
95
 
96
  def __repr__(self):
97
+ llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
98
+ return f"{self.__class__.__name__}({llm_info})"
99
+
100
 
101
  class LC_Phi3(LCInterface, ABC):
102
+ def __init__(self):
 
 
 
103
  self.model_config = config["LC_Phi-3-mini-4k-instruct-gguf"]
104
+
105
  try:
106
  get_file = requests.get(self.model_config["model_url"])
107
  if get_file.status_code == 200:
 
109
  with open(path_to_model, "wb") as f:
110
  f.write(get_file.content)
111
  logger.info("Model file successfully recorded")
112
+ print("Model file successfully recorded")
113
  f.close()
114
  except FileExistsError:
115
+ print(f"Model file {path_to_model} already exists. Skipping download.")
116
  logger.info(f"Model file {path_to_model} already exists. Skipping download.")
117
  except OSError as e:
118
  print(f"Error while writing a file to directory : {e}")
119
  logger.error(msg="Error while write a file to directory", exc_info=e)
120
 
121
+ self.llm = LlamaCpp(
122
+ model_path=os.path.join(models_dir, self.model_config["model_name"]),
123
+ temperature=self.model_config["temperature"],
124
+ max_tokens=self.model_config["max_tokens"],
125
+ top_p=self.model_config["top_p"],
126
+ top_k=self.model_config["top_k"],
127
+ # callback_manager=callback_manager,
128
+ verbose=True, # Verbose is required to pass to the callback manager
129
+ )
130
+
131
 
132
  def execution(self):
133
  try:
134
+ return self.llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
+ print(f"Execution filed in LC_Phi3 execution function: {e}")
137
+ logger.critical(msg="Execution filed in LC_Phi3 execution function:", exc_info=e)
138
+ return None
139
 
140
  def clear_llm(self, unused_model_dict, current_lc):
141
  # If unused_model_dict is not empty
 
147
  # delete files from models directory except of current_lc
148
  os.remove(value)
149
  logger.info(f"Successfully deleted file {value}")
150
+ print(f"Successfully deleted file {value}")
151
  else:
152
  logger.info(f"Unfortunately dictionary empty or None")
153
+ print(f"Unfortunately dictionary {unused_model_dict} empty or None")
154
 
155
  def get_unused(self, current_lc):
156
 
 
162
  return {item: unused_model_file}
163
  else:
164
  return None
165
+
166
+ def model_name(self):
167
+ return self.model_config["model_name"]
168
 
169
  def __str__(self):
170
+ return f"{self.__class__.__name__}_{self.model_name()}"
171
 
172
  def __repr__(self):
173
+ llm_info = f"llm={self.llm}" if hasattr(self, 'llm') else 'llm=not initialized'
174
+ return f"{self.__class__.__name__}({llm_info})"
llm/llm_factory.py DELETED
@@ -1,27 +0,0 @@
1
- from typing import Dict
2
-
3
- from .huggingfacehub.hf_model import HF_Mistaril, HF_TinyLlama
4
- from .llamacpp.lc_model import LC_TinyLlama, LC_Phi3
5
-
6
-
7
- class LLM_Factory:
8
-
9
- # trigger = {"model_type": "execution_type"} -> {"hf": "small"}
10
- @staticmethod
11
- def create_llm(prompt_entity: str, prompt_id: int, trigger: Dict[str, str]):
12
-
13
- print(trigger)
14
-
15
- for key, value in trigger.items():
16
- if key == "hf" and value == "effective":
17
- model = HF_Mistaril(prompt_entity=prompt_entity, prompt_id=prompt_id)
18
- elif key == "hf" and value == "small":
19
- model = HF_TinyLlama(prompt_entity=prompt_entity, prompt_id=prompt_id)
20
- elif key == "lc" and value == "effective":
21
- model = LC_Phi3(prompt_entity=prompt_entity, prompt_id=prompt_id)
22
- elif key == "lc" and value == "small":
23
- model = LC_TinyLlama(prompt_entity=prompt_entity, prompt_id=prompt_id)
24
- else:
25
- model = None
26
-
27
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llm/models/tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4c9bb1dbaa372f6381a035fa5c02ef087aaa1ff1f843a56a22328114f03fc59
3
- size 1170781568
 
 
 
 
llm/prompts.yaml DELETED
@@ -1,76 +0,0 @@
1
- # https://www.promptingguide.ai/techniques/
2
-
3
- # Add more examples of the template here, id should be an interval from 1 to 100_000
4
- # The description provides a basic overview of the template, the description should include the level of difficulty,
5
- # the name of the prompting method and end with the word "prompt", e.g. "simple short prompt" or "simple zero-shot thought chain prompt
6
- # The prompt is rated from 0 to 10
7
-
8
- prompts:
9
- - id: 1
10
- prompt_template: |
11
- Question: {question}
12
- Answer:
13
- description: simple question without a prompt
14
- rate: 1
15
-
16
- - id: 2
17
- prompt_template: |
18
- Question: {question}
19
- Answer: Write a concise answer on the question with one example if it's possible. CONCISE ANSWER.
20
- description: simple concise prompt
21
- rate: 3
22
-
23
- # useful for solving simple math task
24
- - id: 3
25
- prompt_template: |
26
- Let's think step by step.
27
- Question: {question}
28
- Answer:
29
- description: simple zero-shot chain-of-thoughts prompt
30
- rate: 5
31
-
32
- # another one example for solving simple math task
33
- - id: 4
34
- prompt_template: |
35
- Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls.
36
- Each can has 3 tennis balls. How many tennis balls does he have now?
37
- A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
38
- Question: {question}
39
- Answer:
40
- description: simple few-shot chain-of-thoughts prompt
41
- rate: 5
42
-
43
- - id: 5
44
- prompt_template: |
45
- Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done,
46
- there will be 21 trees. How many trees did the grove workers plant today?
47
- A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted.
48
- So, they must have planted 21 - 15 = 6 trees. The answer is 6.
49
- Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
50
- A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.
51
- Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
52
- A: Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74
53
- chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.
54
- Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops
55
- did Jason give to Denny?
56
- A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of
57
- lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.
58
- Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does
59
- he have now?
60
- A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so
61
- in total he has 7 + 2 = 9 toys. The answer is 9.
62
- Q: There were nine computers in the server room. Five more computers were installed each day, from
63
- monday to thursday. How many computers are now in the server room?
64
- A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 =
65
- 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers.
66
- The answer is 29.
67
- Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many
68
- golf balls did he have at the end of wednesday?
69
- A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On
70
- Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.
71
- Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
72
- A: She bought 5 bagels for $3 each. This means she spent $15. She has $8 left.
73
- Question: {question}
74
- Answer:
75
- description: simple self-consistency prompt
76
- rate: 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llm/utils/chat.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import logging
3
+
4
+ from llm.apimodels.gemini_model import Gemini
5
+ from llm.apimodels.hf_model import HF_Mistaril, HF_TinyLlama
6
+ from llm.llamacpp.lc_model import LC_Phi3, LC_TinyLlama
7
+
8
+ from typing import Optional, Any
9
+
10
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
11
+ from langchain.chains import ConversationChain
12
+
13
+ logger = logging.getLogger(__name__)
14
+ logger.setLevel(logging.WARNING)
15
+
16
+ file_handler = logging.FileHandler(
17
+ "logs/chelsea_llm_chat.log") # for all modules here template for logs file is "llm/logs/chelsea_{module_name}_{entity}.log"
18
+ logger.setLevel(logging.INFO) # informed
19
+
20
+ formatted = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
21
+ file_handler.setFormatter(formatted)
22
+
23
+ logger.addHandler(file_handler)
24
+ logger.info("Getting information from chat module")
25
+
26
+ def prettify(raw_text: str) -> str:
27
+ pretty = raw_text.replace("**", "")
28
+ return pretty.strip()
29
+
30
+ # option + command + F -> replace
31
+
32
+ memory: ConversationBufferWindowMemory = ConversationBufferWindowMemory(k=3, ai_prefix="Chelsea")
33
+
34
+ DELAY: int = 300 # 5 minutes
35
+
36
+ def has_failed(conversation, prompt) -> Optional[str]:
37
+ """
38
+ Checks if the LLM conversation prediction fails and returns None if so.
39
+
40
+ Args:
41
+ conversation: The LLM conversation object used for prediction.
42
+ prompt: The prompt to be used for prediction.
43
+
44
+ Returns:
45
+ None, otherwise the prettified response.
46
+ """
47
+
48
+ try:
49
+ response = conversation.predict(input=prompt)
50
+ print(f"response: {response}")
51
+ result = prettify(raw_text=response)
52
+ return result
53
+ except Exception as e:
54
+ logger.error(msg="Error during prediction with conversation in has_failed function", exc_info=e)
55
+ print(f"Error during prediction with conversation in has_failed function: {e}")
56
+ return None
57
+
58
+
59
+ def has_delay(conversation, prompt) -> Optional[str]:
60
+ """
61
+ Checks if the LLM conversation prediction takes longer than a set delay.
62
+
63
+ Args:
64
+ conversation: The LLM conversation object used for prediction.
65
+ prompt: The prompt to be used for prediction.
66
+
67
+ Returns:
68
+ None if the execution time exceeds the delay,
69
+ otherwise, the prettified response from the conversation object.
70
+ """
71
+
72
+ start_time = time.perf_counter() # Start timer before prediction
73
+ try:
74
+ response = conversation.predict(input=prompt)
75
+ execution_time = time.perf_counter() - start_time # Calculate execution time
76
+
77
+ if execution_time > DELAY:
78
+ return None # Return None if delayed
79
+
80
+ result = prettify(raw_text=response) # Prettify the response
81
+ return result # Return the prettified response
82
+
83
+ except Exception as e:
84
+ logger.error(msg="Error during prediction with conversation in has_delay function", exc_info=e)
85
+ print(f"Error during prediction with conversation in has_delay function: {e}")
86
+
87
+
88
+ class Conversation:
89
+ def __init__(self):
90
+ """
91
+ Initializes the Conversation class with a prompt and a list of LLM model classes.
92
+
93
+ Args:
94
+ model_classes (list, optional): A list of LLM model classes to try in sequence.
95
+ Defaults to [Gemini, HF_Mistaril, HF_TinyLlama, LC_Phi3, LC_TinyLlama].
96
+ """
97
+
98
+ self.model_classes = [Gemini, HF_Mistaril, HF_TinyLlama, LC_Phi3, LC_TinyLlama]
99
+ self.current_model_index = 0
100
+
101
+ def _get_conversation(self) -> Any:
102
+ """
103
+ Creates a ConversationChain object using the current model class.
104
+ """
105
+ try:
106
+ current_model_class = self.model_classes[self.current_model_index]
107
+ print("current model class is: ", current_model_class)
108
+ return ConversationChain(llm=current_model_class().execution(), memory=memory, return_final_only=True)
109
+ except Exception as e:
110
+ logger.error(msg="Error during conversation chain in get_conversation function", exc_info=e)
111
+ print(f"Error during conversation chain in get_conversation function: {e}")
112
+
113
+ def chatting(self, prompt: str) -> str:
114
+ """
115
+ Carries out the conversation with the user, handling errors and delays.
116
+
117
+ Args:
118
+ prompt(str): The prompt to be used for prediction.
119
+
120
+ Returns:
121
+ Optional[str]: The final conversation response or None if all models fail.
122
+ """
123
+
124
+ if prompt is None or prompt == "":
125
+ raise Exception(f"Prompt must be string not None or empty string: {prompt}")
126
+
127
+ while self.current_model_index < len(self.model_classes):
128
+ conversation = self._get_conversation()
129
+
130
+ result = has_failed(conversation=conversation, prompt=prompt)
131
+ if result is not None:
132
+ return result
133
+ print(f"chat - chatting result : {result}")
134
+
135
+ result = has_delay(conversation=conversation, prompt=prompt)
136
+ if result is None:
137
+ self.current_model_index += 1 # Switch to next model after delay
138
+ continue
139
+
140
+ return result
141
+
142
+ return "All models failed conversation. Please, try again"
143
+
144
+ def __str__(self) -> str:
145
+ return f"prompt: {type(self.prompt)}"
146
+
147
+ def __repr__(self) -> str:
148
+ return f"{self.__class__.__name__}(prompt: {type(self.prompt)})"
149
+
llm/{config.py β†’ utils/config.py} RENAMED
@@ -1,14 +1,14 @@
1
  config = {
2
  "HF_Mistrail": {
3
  "model": "mistralai/Mistral-7B-Instruct-v0.2",
4
- "temperature": 0.1,
5
  "max_new_tokens": 1024,
6
  "top_k": 5,
7
  "load_in_8bit": True
8
  },
9
  "HF_TinyLlama": {
10
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
11
- "temperature": 0.1,
12
  "max_new_tokens": 1024,
13
  "top_k": 5,
14
  "top_p":0.95,
@@ -18,7 +18,7 @@ config = {
18
  "LC_TinyLlama-1.1B-Chat-v1.0-GGUF": {
19
  "model_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
20
  "model_name": "tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin",
21
- "temperature": 0.4,
22
  "max_tokens": 868,
23
  "top_p": 0.8,
24
  "top_k": 5,
@@ -26,7 +26,7 @@ config = {
26
  "LC_Phi-3-mini-4k-instruct-gguf": {
27
  "model_url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
28
  "model_name": "Phi-3-mini-4k-instruct-gguf.bin",
29
- "temperature": 0.4,
30
  "max_tokens": 868,
31
  "top_p": 0.8,
32
  "top_k": 5,
 
1
  config = {
2
  "HF_Mistrail": {
3
  "model": "mistralai/Mistral-7B-Instruct-v0.2",
4
+ "temperature": 0.5,
5
  "max_new_tokens": 1024,
6
  "top_k": 5,
7
  "load_in_8bit": True
8
  },
9
  "HF_TinyLlama": {
10
  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
11
+ "temperature": 0.5,
12
  "max_new_tokens": 1024,
13
  "top_k": 5,
14
  "top_p":0.95,
 
18
  "LC_TinyLlama-1.1B-Chat-v1.0-GGUF": {
19
  "model_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
20
  "model_name": "tinyllama-1.1b-chat-v1.0.Q8_0.gguf.bin",
21
+ "temperature": 0.5,
22
  "max_tokens": 868,
23
  "top_p": 0.8,
24
  "top_k": 5,
 
26
  "LC_Phi-3-mini-4k-instruct-gguf": {
27
  "model_url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
28
  "model_name": "Phi-3-mini-4k-instruct-gguf.bin",
29
+ "temperature": 0.5,
30
  "max_tokens": 868,
31
  "top_p": 0.8,
32
  "top_k": 5,
llm/{hf_interface.py β†’ utils/hf_interface.py} RENAMED
@@ -1,8 +1,8 @@
1
  from abc import ABC, abstractmethod
2
-
3
 
4
  class HFInterface(ABC):
5
  @abstractmethod
6
- def execution(self):
7
- """Method execution LLM model based on HuggingFace or Langchain"""
8
  pass
 
1
  from abc import ABC, abstractmethod
2
+ from typing import Any, Optional
3
 
4
  class HFInterface(ABC):
5
  @abstractmethod
6
+ def execution(self) -> Optional[Any]:
7
+ """Method execution LLM model based on HuggingFace or others"""
8
  pass
llm/{lc_interface.py β†’ utils/lc_interface.py} RENAMED
File without changes
llm/utils/toggle.py DELETED
File without changes
logs/chelsea_llm_chat.log ADDED
@@ -0,0 +1 @@
 
 
1
+ 2024-07-30 19:01:46,091 - INFO - Getting information from chat module
logs/chelsea_llm_gemini.log ADDED
@@ -0,0 +1 @@
 
 
1
+ 2024-07-30 19:01:45,915 - INFO - Getting information from apimodel module
logs/chelsea_llm_huggingfacehub.log CHANGED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ 2024-07-30 19:01:45,937 - INFO - Getting information from apimodel module
logs/chelsea_llm_llamacpp.log CHANGED
@@ -0,0 +1 @@
 
 
1
+
requirements.txt CHANGED
@@ -15,19 +15,23 @@ openai
15
  ffprobe
16
 
17
  # related to huggingface
18
- torch~=2.2.2
19
  transformers~=4.40.1
20
  accelerate
21
  sentence-transformers
22
  bitsandbytes
23
  einops
24
 
 
 
 
25
  # related to langchain
26
  langchain~=0.1.17
 
27
  pypdf
28
  chromadb
29
 
30
  # related to audio
31
  librosa==0.10.1
32
  soundfile~=0.12.1
33
- numpy~=1.26.4
 
15
  ffprobe
16
 
17
  # related to huggingface
18
+ torch~=2.2.0
19
  transformers~=4.40.1
20
  accelerate
21
  sentence-transformers
22
  bitsandbytes
23
  einops
24
 
25
+ #gemini-langchain
26
+ langchain_google_genai
27
+
28
  # related to langchain
29
  langchain~=0.1.17
30
+ langchain-community
31
  pypdf
32
  chromadb
33
 
34
  # related to audio
35
  librosa==0.10.1
36
  soundfile~=0.12.1
37
+ numpy~=1.24.4
todo.txt DELETED
@@ -1,7 +0,0 @@
1
- ToDo
2
-
3
- - ΠŸΠ΅Ρ€Π΅ΠΏΠΈΡΠ°Ρ‚ΠΈ ΠΌΠΎΠ΄ΡƒΠ»ΡŒ ΠΊΠΎΠΌΠ°Π½Π΄ΠΈ оновивши ΠΏΠ°Ρ‚Π΅Π΅Ρ€Π½ Π½Π° Factory, Π΄ΠΎΠ΄Π°Ρ‚ΠΈ Ρ–Π½ΡˆΡ– ΠΊΠΎΠΌΠ°Π½Π΄ΠΈ. ΠŸΡ€ΠΈΠ΄ΡƒΠΌΠ°Ρ‚ΠΈ як Π· Π½ΠΈΠΌΠΈ взаємодіяти
4
- - Π”ΠΎΠ΄Π°Ρ‚ΠΈ очищСння для lc
5
- - Π”ΠΎΠ΄Π°Ρ‚ΠΈ Ρ‰Π΅ ΠΌΠΎΠ΄Π΅Π»Ρ– Π΄ΠΎ hf Ρ‚Π° Π·Ρ€ΠΎΠ±ΠΈΡ‚ΠΈ ΠΏΠ΅Π½Π°Π»ΡŒΡ‚Ρ–
6
- - ΠžΠ½ΠΎΠ²ΠΈΡ‚ΠΈ ΠΏΡ€ΠΎΠΌΠΏΡ‚ΠΈ
7
- - ΠŸΡ€ΠΈΠ΄ΡƒΠΌΠ°Ρ‚ΠΈ ΠΌΠΎΠΆΠ»ΠΈΠ²Ρ–ΡΡ‚ΡŒ Π·ΠΌΡ–Π½ΠΈ ΠΌΠΎΠ΄Π΅Π»Ρ–
 
 
 
 
 
 
 
 
{command β†’ utils}/__init__.py RENAMED
File without changes
utils/documentation.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ TEMPLATE = """
4
+ A complete list of commands that are designed to facilitate the use of the voice assistant Chelsea.
5
+ The complete list consists of no more than 100 commands written in a txt file.
6
+ The list of commands will be updated as the assistant is developed.
7
+ The first version of the program (Arctic Monkeys) contains a total of 2 commands.
8
+
9
+ The list of commands and their use.
10
+ """
11
+
12
+ class Documentation():
13
+ def execution(self):
14
+ st.write(f"Documentation:\n\n{TEMPLATE}")
utils/keywords.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+ keywords: Dict[str, List[str]] = {
4
+ "math": [
5
+ "math", "mathematica", "algebra", "geometry", "calculus", "trigonometry", "arithmetic",
6
+ "statistics", "probability", "equation", "theorem", "proof",
7
+ "function", "integral", "derivative", "matrix", "vector",
8
+ "series", "sequence", "logarithm", "exponent", "ratio",
9
+ "proportion", "mean", "median", "mode", "variance",
10
+ "standard deviation", "normal distribution", "binomial distribution", "polynomial", "quadratic",
11
+ "linear", "nonlinear", "differential equation", "graph", "plot",
12
+ "coordinate", "axis", "slope", "intercept", "angle",
13
+ "sin", "cos", "tan", "cot", "sec", "sine", "cosine", "tangent", "cotangent", "secant", "cosecant",
14
+ "csc", "hypotenuse", "adjacent", "opposite", "prime number",
15
+ "composite number", "factor", "multiple", "divisor", "fraction",
16
+ "decimal", "percent", "permutation", "combination", "set",
17
+ "subset", "union", "intersection", "probability distribution", "random variable",
18
+ "symmetric", "asymmetric", "function", "domain", "range",
19
+ "parabola", "hyperbola", "ellipse", "conic sections", "scalar",
20
+ "dot product", "cross product", "eigenvalue", "eigenvector", "identity matrix",
21
+ "determinant", "transpose", "inverse matrix", "system of equations", "linear transformation",
22
+ "orthogonal", "projection", "angle bisector", "midpoint", "distance formula",
23
+ "circumference", "area", "volume", "surface area", "unit circle",
24
+ "radian", "degree", "sector", "arc", "chord",
25
+ "geometric sequence", "arithmetic sequence", "infinity", "limit", "continuity",
26
+ "convergence", "divergence", "power series", "taylor series", "maclaurin series",
27
+ "partial fraction", "improper integral", "line integral", "surface integral", "triple integral"
28
+ ],
29
+ "physics": [
30
+ "physics", "quantum mechanics", "relativity", "thermodynamics", "electromagnetism", "classical mechanics",
31
+ "particle physics", "nuclear physics", "optics", "wave", "particle",
32
+ "energy", "force", "motion", "velocity", "acceleration",
33
+ "mass", "weight", "gravity", "magnetism", "electricity",
34
+ "current", "voltage", "resistance", "circuit", "magnetic field",
35
+ "electric field", "photon", "electron", "proton", "neutron",
36
+ "atom", "molecule", "nucleus", "radioactivity", "fission",
37
+ "fusion", "black hole", "big bang", "entropy", "enthalpy",
38
+ "conduction", "convection", "radiation", "wave-particle duality", "superposition",
39
+ "entanglement", "heisenberg uncertainty principle", "schrodinger's cat", "string theory", "standard model",
40
+ "higgs boson", "dark matter", "dark energy", "gravitational waves", "relativistic effects",
41
+ "time dilation", "length contraction", "inertia", "momentum", "angular momentum",
42
+ "torque", "centripetal force", "centrifugal force", "kinetic energy", "potential energy",
43
+ "work", "power", "conservation laws", "frame of reference", "wave function",
44
+ "quantum state", "wave equation", "phase", "frequency", "wavelength",
45
+ "amplitude", "interference", "diffraction", "polarization", "reflection",
46
+ "refraction",
47
+ "plasma", "boson", "fermion", "lepton", "quark",
48
+ "gluons", "neutrino", "antimatter", "tachyon", "string",
49
+ "brane", "multiverse", "spacetime", "singularity", "event horizon",
50
+ "hawking radiation", "cosmology", "cosmic microwave background", "redshift", "blueshift",
51
+ "doppler effect", "hubble's law", "expansion of the universe", "cosmic inflation", "dark flow",
52
+ "supernova", "neutron star", "pulsar", "quasar", "gamma-ray burst",
53
+ "solar wind", "magnetosphere", "aurora", "flux", "field lines",
54
+ "ampere's law", "faraday's law", "gauss's law", "lorentz force", "coulomb's law",
55
+ "kepler's laws", "newton's laws", "planck's constant", "speed of light", "universal constant",
56
+ "atomic mass", "atomic number", "isotope", "half-life", "binding energy",
57
+ "solid state physics", "semiconductor", "superconductor", "hall effect", "ferromagnetism"
58
+ ],
59
+ "history": [
60
+ "history", "revolution", "war", "empire", "kingdom", "dynasty",
61
+ "monarchy", "republic", "democracy", "constitution", "treaty",
62
+ "colonization", "independence", "civilization", "ancient", "medieval",
63
+ "modern", "industrialization", "renaissance", "enlightenment", "feudalism",
64
+ "imperialism", "nationalism", "communism", "socialism", "capitalism",
65
+ "fascism", "dictatorship", "cold war", "world war", "reformation",
66
+ "crusades", "expedition", "exploration", "discovery", "trade",
67
+ "silk road", "spice trade", "slavery", "abolition", "emancipation",
68
+ "civil rights", "suffrage", "holocaust", "genocide", "revolutionary war",
69
+ "civil war", "world war i", "world war ii", "great depression", "cold war",
70
+ "cold war era", "space race", "arms race", "nuclear proliferation", "decolonization",
71
+ "indian independence", "american revolution", "french revolution", "russian revolution", "chinese revolution",
72
+ "cuban revolution", "vietnam war", "korean war", "gulf war", "iraq war",
73
+ "afghanistan war", "middle ages", "renaissance period", "industrial revolution", "scientific revolution",
74
+ "age of exploration", "age of enlightenment", "victorian era", "edwardian era", "progressive era",
75
+ "roaring twenties", "great depression era", "post-war era", "cold war period", "digital age",
76
+ "information age", "modern era",
77
+ "ancient egypt", "ancient greece", "roman empire", "byzantine empire", "ottoman empire",
78
+ "ming dynasty", "qing dynasty", "han dynasty", "gupta empire", "mauryan empire",
79
+ "aztec empire", "inca empire", "maya civilization", "mesopotamia", "babylon",
80
+ "persian empire", "carthage", "vikings", "mongol empire", "medieval europe",
81
+ "feudal japan", "samurai", "shogunate", "mughal empire", "age of reason",
82
+ "scientific enlightenment", "american civil rights movement", "women's suffrage", "the great migration", "civil disobedience",
83
+ "nonviolent resistance", "apartheid", "mandela", "gandhi", "martin luther king jr.",
84
+ "malcolm x", "che guevara", "mao zedong", "stalin", "hitler",
85
+ "churchill", "fdr", "truman", "kennedy", "roosevelt",
86
+ "cold war espionage", "berlin wall", "iron curtain", "nato", "warsaw pact",
87
+ "marshall plan", "watergate", "cuban missile crisis", "yugoslav wars", "bosnian genocide",
88
+ "rwanda genocide", "darfur conflict", "arab spring", "syrian civil war", "russo-ukrainian war",
89
+ "brexit", "european union", "united nations", "nato", "world trade organization",
90
+ "nafta", "trans-pacific partnership", "climate change", "global warming", "paris agreement"
91
+ ],
92
+ "technology": [
93
+ "artificial intelligence", "software", "hardware", "programming", "coding", "algorithm",
94
+ "machine learning", "artificial intelligence", "ai", "neural networks", "deep learning",
95
+ "data science", "big data", "cloud computing", "internet of things", "iot",
96
+ "cybersecurity", "encryption", "blockchain", "cryptocurrency", "bitcoin",
97
+ "ethereum", "smart contract", "virtual reality", "vr", "augmented reality",
98
+ "ar", "robotics", "automation", "3d printing", "biotechnology",
99
+ "genetic engineering", "nanotechnology", "quantum computing", "quantum technology", "5g",
100
+ "wireless communication", "network", "database", "sql", "nosql",
101
+ "web development", "frontend", "backend", "full stack", "html",
102
+ "css", "javascript", "react", "angular", "vue",
103
+ "node.js", "python", "java", "c++", "c#",
104
+ "ruby", "swift", "kotlin", "mobile development", "ios",
105
+ "android", "app development", "user interface", "ui", "user experience",
106
+ "ux", "responsive design", "devops", "agile", "scrum",
107
+ "kanban", "version control", "git", "github", "continuous integration",
108
+ "ci", "continuous deployment", "cd", "containerization", "docker",
109
+ "kubernetes", "microservices", "serverless", "cloud infrastructure", "aws",
110
+ "azure", "google cloud", "gcp", "digital transformation", "edge computing",
111
+ "fog computing", "smart home", "smart devices", "wearables", "fitness trackers",
112
+ "health tech", "medtech", "fintech", "edtech", "proptech",
113
+ "natural language processing", "nlp", "speech recognition", "chatbots", "virtual assistants",
114
+ "augmented reality", "mixed reality", "extended reality", "er", "haptic technology",
115
+ "3d modeling", "computer graphics", "game development", "simulation", "digital twins",
116
+ "smart cities", "connected vehicles", "autonomous vehicles", "self-driving cars", "electric vehicles",
117
+ "ev", "renewable energy", "solar power", "wind power", "hydroelectric power",
118
+ "smart grid", "energy storage", "battery technology", "wearable tech", "smartwatch",
119
+ "fitness tracker", "smart glasses", "e-learning", "online education", "moocs",
120
+ "massive open online courses", "online collaboration", "telemedicine", "remote work", "digital workspace",
121
+ "cryptography", "secure communications", "quantum encryption", "privacy", "data protection",
122
+ "regtech", "insurtech", "agritech", "contech", "govtech",
123
+ "martech", "hrtech", "legaltech", "real estate technology", "property management systems",
124
+ "virtual real estate", "3d printing construction", "sustainable technology", "cleantech", "green technology",
125
+ "environmental technology", "recycling technology", "waste management technology", "water purification technology", "air purification technology",
126
+ "carbon capture", "carbon footprint reduction", "renewable materials", "biodegradable materials", "smart packaging",
127
+ "advanced manufacturing", "industry 4.0", "internet of behaviors", "behavioral data", "personalization",
128
+ "customer experience", "cx", "user journey", "touchpoints", "interaction design",
129
+ "service design", "design thinking", "innovation management", "creative technology", "disruptive technology"
130
+ ],
131
+ "life": [
132
+ "health", "wellness", "fitness", "nutrition", "diet",
133
+ "exercise", "mental health", "stress", "anxiety", "depression",
134
+ "mindfulness", "meditation", "yoga", "self-care", "hygiene",
135
+ "lifestyle", "work-life balance", "relationships", "friendship", "family",
136
+ "parenting", "education", "career", "job", "profession",
137
+ "hobbies", "interests", "travel", "adventure", "vacation",
138
+ "culture", "arts", "music", "film", "literature",
139
+ "reading", "writing", "creativity", "crafts", "diy",
140
+ "cooking", "recipes", "baking", "gardening", "sustainability",
141
+ "environment", "eco-friendly", "green living", "minimalism", "decluttering",
142
+ "finance", "budgeting", "saving", "investing", "retirement",
143
+ "insurance", "real estate", "housing", "mortgage", "renting",
144
+ "transportation", "driving", "public transit", "biking", "walking",
145
+ "pets", "animals", "dog", "cat", "pet care",
146
+ "community", "volunteering", "charity", "philanthropy", "social issues",
147
+ "politics", "government", "laws", "rights", "justice",
148
+ "equality", "diversity", "inclusion", "personal development", "self-improvement",
149
+ "goal setting", "productivity", "time management", "motivation", "inspiration",
150
+ "spirituality", "religion", "faith", "beliefs", "values",
151
+ "ethics", "morality", "philosophy", "wisdom", "knowledge",
152
+ "learning", "education system", "school", "college", "university",
153
+ "workplace", "entrepreneurship", "startups", "leadership", "management",
154
+ "mentorship", "networking", "communication skills", "public speaking", "negotiation",
155
+ "conflict resolution", "teamwork", "collaboration", "project management", "organizational skills",
156
+ "problem-solving", "critical thinking", "decision making", "emotional intelligence", "self-awareness",
157
+ "resilience", "adaptability", "creativity", "innovation", "lifelong learning",
158
+ "personal growth", "mindset", "habits", "routine", "discipline",
159
+ "focus", "concentration", "clarity", "vision", "values",
160
+ "purpose", "fulfillment", "happiness", "gratitude", "optimism",
161
+ "positive thinking", "self-esteem", "self-confidence", "body image", "self-acceptance",
162
+ "stress management", "relaxation", "rest", "sleep", "dreams",
163
+ "lucid dreaming", "mental clarity", "cognitive function", "memory", "learning styles",
164
+ "study techniques", "academic success", "career planning", "professional development", "workplace culture",
165
+ "employee engagement", "job satisfaction", "work environment", "remote work", "telecommuting",
166
+ "flexible work", "gig economy", "side hustle", "financial independence", "early retirement",
167
+ "fire movement", "minimalism", "simple living", "downshifting", "voluntary simplicity",
168
+ "sustainable living", "zero waste", "plastic-free", "veganism", "vegetarianism",
169
+ "plant-based diet", "organic food", "local food", "farm-to-table", "slow food",
170
+ "mindful eating", "intuitive eating", "diet culture", "body positivity", "weight management",
171
+ "fitness goals", "workout routines", "exercise science", "sports nutrition", "athletic performance",
172
+ "injury prevention", "rehabilitation", "physiotherapy", "chiropractic care", "alternative medicine",
173
+ "holistic health", "integrative medicine", "traditional medicine", "herbal medicine", "acupuncture",
174
+ "massage therapy", "aromatherapy", "sound therapy", "energy healing", "reiki",
175
+ "chakra balancing", "spiritual healing", "faith healing", "meditative practices", "breathwork",
176
+ "pranayama", "tai chi", "qigong", "martial arts", "self-defense",
177
+ "dance", "movement therapy", "art therapy", "music therapy", "drama therapy",
178
+ "play therapy", "animal-assisted therapy", "equine therapy", "nature therapy", "ecotherapy",
179
+ "forest bathing", "wild swimming", "outdoor activities", "hiking", "camping",
180
+ "backpacking", "mountaineering", "rock climbing", "bouldering", "caving",
181
+ "kayaking", "canoeing", "rafting", "sailing", "boating",
182
+ "fishing", "birdwatching", "wildlife photography", "stargazing", "astronomy",
183
+ "geocaching", "orienteering", "survival skills", "bushcraft", "foraging",
184
+ "homesteading", "permaculture", "urban gardening", "community gardening", "allotment gardening",
185
+ "container gardening", "vertical gardening", "indoor gardening", "houseplants", "succulents",
186
+ "bonsai", "orchids", "roses", "tulips", "wildflowers",
187
+ "herbs", "vegetables", "fruits", "berries", "nut trees",
188
+ "shade gardening", "water gardening", "xeriscaping", "landscape design", "garden design",
189
+ "garden maintenance", "pruning", "composting", "soil health", "organic gardening",
190
+ "pest control", "natural fertilizers", "greenhouse gardening", "aquaponics", "hydroponics",
191
+ "aquaculture", "sustainable agriculture", "regenerative agriculture", "carbon farming", "climate-smart agriculture",
192
+ "urban farming", "city farming", "vertical farming", "rooftop farming", "indoor farming",
193
+ "community supported agriculture", "csa", "farmers markets", "local food systems", "food sovereignty",
194
+ "food security", "food justice", "food deserts", "food waste", "food recovery",
195
+ "gleaning", "food rescue", "food banks", "food pantries", "community kitchens",
196
+ "soup kitchens", "meal programs", "nutrition education", "food policy", "agricultural policy",
197
+ "food industry", "food science", "food technology", "food safety", "food regulation",
198
+ "dietary guidelines", "nutrition research", "public health", "global health", "health equity",
199
+ "social determinants of health", "healthcare access", "universal healthcare", "healthcare policy", "health insurance",
200
+ "primary care", "preventive care", "chronic disease management", "mental health care", "substance abuse treatment",
201
+ "addiction recovery", "rehabilitation services", "disability services", "elder care", "geriatric care",
202
+ "end-of-life care", "palliative care", "hospice care", "patient advocacy", "health literacy",
203
+ "patient empowerment", "shared decision making", "informed consent", "medical ethics", "bioethics",
204
+ "genetic counseling", "reproductive health", "maternal health", "child health", "adolescent health",
205
+ "men's health", "women's health", "lgbtq+ health", "sexual health", "sex education",
206
+ "family planning", "birth control", "fertility", "infertility", "adoption",
207
+ "surrogacy", "pregnancy", "prenatal care", "postpartum care", "breastfeeding",
208
+ "newborn care", "pediatric care", "immunizations", "vaccinations", "infectious diseases",
209
+ "pandemics", "epidemics", "public health response", "emergency preparedness", "disaster response",
210
+ "humanitarian aid", "global development", "international relations", "diplomacy", "peacekeeping",
211
+ "conflict resolution", "human rights", "civil liberties", "social justice", "advocacy",
212
+ "activism", "community organizing", "grassroots movements", "social movements", "political activism",
213
+ "environmental activism", "climate activism", "sustainable development", "social entrepreneurship", "impact investing",
214
+ "corporate social responsibility", "csr", "ethical business", "fair trade", "sustainable fashion",
215
+ "slow fashion", "circular economy", "zero waste lifestyle", "plastic-free living", "minimal waste",
216
+ "eco-friendly products", "green products", "sustainable brands", "ethical consumerism", "conscious consumerism",
217
+ "mindful living", "simple living", "voluntary simplicity", "intentional living", "purpose-driven life",
218
+ "values-driven life", "authenticity", "integrity", "vulnerability", "empathy",
219
+ "compassion", "kindness", "gratitude", "generosity", "service",
220
+ "community service", "volunteerism", "philanthropy", "charitable giving", "social impact",
221
+ "civic engagement", "democratic participation", "voting", "elections", "campaigns",
222
+ "political engagement", "policy advocacy", "government accountability", "transparency", "good governance",
223
+ "public accountability", "citizen oversight", "public participation", "community involvement", "collective action",
224
+ "solidarity", "social cohesion", "community resilience", "disaster resilience", "climate resilience",
225
+ "ecosystem resilience", "environmental stewardship", "conservation", "biodiversity", "wildlife protection",
226
+ "habitat restoration", "ecosystem services", "natural resources", "sustainable resource management", "renewable resources",
227
+ "non-renewable resources", "energy conservation", "water conservation", "soil conservation", "forest conservation",
228
+ "marine conservation", "sustainable fisheries", "sustainable forestry", "sustainable agriculture", "sustainable tourism",
229
+ "eco-tourism", "nature-based tourism", "cultural tourism", "heritage tourism", "community-based tourism",
230
+ "responsible tourism", "ethical tourism", "regenerative tourism", "adventure tourism", "wildlife tourism",
231
+ "urban tourism", "rural tourism", "agritourism", "gastrotourism", "culinary tourism",
232
+ "food tourism", "wine tourism", "beer tourism", "coffee tourism", "chocolate tourism",
233
+ "craft tourism", "artisan tourism", "handicraft tourism", "souvenir tourism", "local tourism",
234
+ "staycations", "domestic tourism", "short-term rentals", "vacation rentals", "holiday homes",
235
+ "vacation homes", "second homes", "vacation planning", "travel planning", "itinerary planning",
236
+ "travel tips", "packing tips", "travel hacks", "budget travel", "luxury travel",
237
+ "solo travel", "group travel", "family travel", "pet-friendly travel", "accessible travel",
238
+ "sustainable travel", "green travel", "slow travel", "responsible travel", "ethical travel",
239
+ "off-the-beaten-path travel", "hidden gems", "bucket list", "once-in-a-lifetime trips", "once-in-a-lifetime experiences",
240
+ "adventure travel", "extreme travel", "extreme sports", "extreme adventures", "thrill-seeking",
241
+ "adrenaline junkie", "challenge", "personal challenge", "physical challenge", "mental challenge",
242
+ "growth mindset", "fixed mindset", "open-mindedness", "curiosity", "exploration",
243
+ "discovery", "innovation", "creativity", "invention", "problem-solving",
244
+ "critical thinking", "strategic thinking", "analytical thinking", "logical thinking", "scientific thinking",
245
+ "philosophical thinking", "ethical thinking", "creative thinking", "design thinking", "systems thinking",
246
+ "complexity", "ambiguity", "uncertainty", "paradox", "dilemma",
247
+ "contradiction", "tension", "balance", "harmony", "equilibrium",
248
+ "equanimity", "tranquility", "serenity", "calm", "peace",
249
+ "inner peace", "outer peace", "global peace", "world peace", "peacebuilding",
250
+ "conflict prevention", "conflict resolution", "mediation", "negotiation", "dialogue",
251
+ "understanding", "tolerance", "acceptance", "inclusion", "diversity",
252
+ "multiculturalism", "pluralism", "intercultural dialogue", "interfaith dialogue", "cross-cultural communication",
253
+ "cross-cultural understanding", "interpersonal communication", "interpersonal skills", "relationship building", "relationship management",
254
+ "relationship maintenance", "relationship development", "relationship repair", "relationship enhancement", "relationship success",
255
+ "relationship satisfaction", "relationship happiness", "relationship fulfillment", "relationship growth", "relationship dynamics",
256
+ "family dynamics", "family relationships", "parent-child relationships", "sibling relationships", "extended family",
257
+ "family systems", "family therapy", "family counseling", "marriage", "marriage counseling",
258
+ "divorce", "separation", "co-parenting", "blended families", "stepfamilies",
259
+ "adoption", "foster care", "child welfare", "child protection", "child development",
260
+ "childhood", "adolescence", "adolescent development", "youth development", "youth programs",
261
+ "youth leadership", "youth empowerment", "youth engagement", "youth advocacy", "youth participation",
262
+ "youth voice", "youth rights", "youth justice", "youth crime", "youth violence",
263
+ "youth gangs", "youth homelessness", "youth mental health", "youth substance abuse", "youth addiction",
264
+ "youth education", "youth employment", "youth entrepreneurship", "youth innovation", "youth creativity",
265
+ "youth sports", "youth arts", "youth culture", "youth identity", "youth diversity",
266
+ "youth inclusion", "youth equity", "youth social justice", "youth environmental justice", "youth climate action",
267
+ "youth activism", "youth advocacy", "youth leadership", "youth participation", "youth empowerment",
268
+ "youth engagement", "youth organizing", "youth mobilization", "youth networks", "youth movements",
269
+ "youth campaigns", "youth initiatives", "youth projects", "youth programs", "youth services",
270
+ "youth organizations", "youth groups", "youth clubs", "youth associations", "youth councils",
271
+ "youth committees", "youth forums", "youth dialogues", "youth workshops", "youth conferences",
272
+ "youth summits", "youth assemblies", "youth festivals", "youth events", "youth activities",
273
+ "youth education", "youth training", "youth development", "youth mentoring", "youth coaching",
274
+ "youth support", "youth advocacy", "youth empowerment", "youth engagement", "youth participation",
275
+ "youth leadership", "youth entrepreneurship", "youth innovation", "youth creativity", "youth development",
276
+ "youth programs", "youth services", "youth organizations", "youth groups", "youth clubs",
277
+ "youth associations", "youth councils", "youth committees", "youth forums", "youth dialogues",
278
+ "youth workshops", "youth conferences"
279
+ ],
280
+ "riddles": [
281
+ "riddle", "puzzle", "brain teaser", "what am I", "guess",
282
+ "mystery", "conundrum", "enigma", "paradox", "trick question",
283
+ "wordplay", "challenge", "logic puzzle", "mind bender",
284
+ "cryptic", "clue", "riddle me this", "solve", "answer", "question"
285
+ ]
286
+ }
utils/prompt_toggle.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import yaml
4
+
5
+ from typing import Any, Dict, List
6
+ from langchain.prompts import PromptTemplate
7
+
8
+ cwd: str = os.getcwd()
9
+ propmt_file_path: str = os.path.join(cwd, "utils/prompts.yaml")
10
+
11
+ # Load prompts from yaml
12
+ def load_prompts():
13
+ try:
14
+ with open(propmt_file_path, "r") as f:
15
+ return yaml.safe_load(f)['prompts']
16
+ except Exception as e:
17
+ print(f"Reading prompts file has failed {e}")
18
+
19
+ # Preprocess text and keywords
20
+ def __preprocess_text(text: str) -> List[Any]:
21
+ return re.findall(r'\b\w+\b', text.lower())
22
+
23
+ def __preprocess_keywords(keywords: Dict[str, List[str]]) -> Dict[str, List[str]]:
24
+ preprocessed_keywords = {}
25
+ for category, kw_list in keywords.items():
26
+ preprocessed_keywords[category] = set(kw.lower() for kw in kw_list)
27
+ return preprocessed_keywords
28
+
29
+ # Check for keywords in input text
30
+ def __check_for_keywords(text: str, keywords: Dict[str, List[str]]) -> Dict[str, List[str]]:
31
+ preprocessed_keywords = __preprocess_keywords(keywords)
32
+ matched_keywords = {category: [] for category in keywords}
33
+ words = __preprocess_text(text)
34
+
35
+ for word in words:
36
+ for category, kw_set in preprocessed_keywords.items():
37
+ if word in kw_set:
38
+ matched_keywords[category].append(word)
39
+
40
+ matched_keywords = {category: list(set(matches)) for category, matches in matched_keywords.items() if matches}
41
+
42
+ return matched_keywords
43
+
44
+ # Select the most appropriate prompt based on matched keywords
45
+ def select_prompt(input_text: str, prompts: Any, keywords: Dict[str, List[str]]) -> str:
46
+ matched_keywords = __check_for_keywords(input_text, keywords)
47
+ matched_categories = list(matched_keywords.keys())
48
+
49
+ # Default to the highest rated common prompt if no specific category is matched
50
+ selected_prompt = max((p for p in prompts if 'common' in p['purpose']), key=lambda p: p['rate'], default=None)
51
+
52
+ for category in matched_categories:
53
+ category_prompts = [p for p in prompts if category in p['purpose']]
54
+ if category_prompts:
55
+ selected_prompt = max(category_prompts, key=lambda p: p['rate'], default=selected_prompt)
56
+
57
+ prompt_template = PromptTemplate(template=selected_prompt['prompt_template'], input_variables=['entity'])
58
+ prompt = prompt_template.format(entity=input_text)
59
+ return prompt
utils/prompts.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.promptingguide.ai/techniques/
2
+
3
+ # Add more examples of the template here, id should be an interval from 0 to 99_999
4
+ # The description provides a basic overview of the template, the description should include the level of difficulty,
5
+ # the name of the prompting method and end with the word "prompt", e.g. "short prompt" or "zero-shot thought chain prompt
6
+ # The prompt is rated from 0 to 10
7
+ # common purpose means that the prompt fits for all situations
8
+
9
+ prompts:
10
+ - id: 0
11
+ prompt_template: |
12
+ Instruction: You are a voice assistant who enjoys to help people called Chelsea
13
+ Question: {entity}
14
+ Answer:
15
+ description: main prompt
16
+ rate: 8
17
+ purpose: ['common']
18
+
19
+ - id: 1
20
+ prompt_template: |
21
+ Instruction: just give a response
22
+ Question: {entity}
23
+ Answer:
24
+ description: a simple prompt
25
+ rate: 1
26
+ purpose: ['common']
27
+
28
+ - id: 2
29
+ prompt_template: |
30
+ Instruction: Write a concise answer on the question with one example if it's possible. CONCISE ANSWER.
31
+ Question: {entity}
32
+ Answer:
33
+ description: concise prompt
34
+ rate: 3
35
+ purpose: ['common']
36
+
37
+ # useful for solving simple math task
38
+ - id: 3
39
+ prompt_template: |
40
+ Instruction: Let's think step by step.
41
+ Question: {entity}
42
+ Answer:
43
+ description: zero-shot chain-of-thoughts prompt
44
+ rate: 4
45
+ purpose: ['math', 'physics', 'technology']
46
+
47
+ # another one example for solving simple math task
48
+ - id: 4
49
+ prompt_template: |
50
+ Instruction:
51
+ Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls.
52
+ Each can has 3 tennis balls. How many tennis balls does he have now?
53
+ A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
54
+ Question: {entity}
55
+ Answer:
56
+ description: few-shot chain-of-thoughts prompt
57
+ rate: 4
58
+ purpose: ['riddles']
59
+
60
+ - id: 5
61
+ prompt_template: |
62
+ Instruction:
63
+ Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done,
64
+ there will be 21 trees. How many trees did the grove workers plant today?
65
+ A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted.
66
+ So, they must have planted 21 - 15 = 6 trees. The answer is 6.
67
+ Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
68
+ A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.
69
+ Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
70
+ A: Leah had 32 chocolates and Leah`s sister had 42. That means there were originally 32 + 42 = 74
71
+ chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.
72
+ Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops
73
+ did Jason give to Denny?
74
+ A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of
75
+ lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.
76
+ Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does
77
+ he have now?
78
+ A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so
79
+ in total he has 7 + 2 = 9 toys. The answer is 9.
80
+ Q: There were nine computers in the server room. Five more computers were installed each day, from
81
+ monday to thursday. How many computers are now in the server room?
82
+ A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 =
83
+ 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers.
84
+ The answer is 29.
85
+ Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many
86
+ golf balls did he have at the end of wednesday?
87
+ A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On
88
+ Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.
89
+ Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
90
+ A: She bought 5 bagels for $3 each. This means she spent $15. She has $8 left.
91
+ Question: {entity}
92
+ Answer:
93
+ description: self-consistency prompt
94
+ rate: 6
95
+ purpose: ['riddles']