File size: 4,487 Bytes
d022aab 7c29ee2 1581bbf 7378fc8 e294914 6aa156e d5436e0 f8dfb0f d63135e e294914 7378fc8 e294914 7378fc8 6aa156e d4f3eaa e294914 d022aab 7378fc8 c8abf3f d022aab 7c29ee2 7378fc8 7c29ee2 58c9786 7378fc8 6aa156e 58c9786 7378fc8 91b59ba 7378fc8 6aa156e 663b2d1 6aa156e 7801155 b67ebed 58c9786 b67ebed d4f3eaa 7378fc8 ff23c04 91b59ba 7378fc8 91b59ba 5a785fa e294914 9ee8906 3e2a726 5a785fa 3e2a726 cf08317 8bd4cb3 e294914 5a785fa 8bd4cb3 5a785fa 3e2a726 7378fc8 e294914 7378fc8 cf08317 7378fc8 8bd4cb3 e294914 0f596d3 e294914 5a785fa e294914 8bd4cb3 5a785fa 8bd4cb3 c8abf3f 8bd4cb3 5a785fa 4affef3 5a785fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# python core libraries
import re
import psutil
import time
import random
# streamlit
import streamlit as st
import streamlit.components.v1 as components
# components from other authors
from streamlit_mic_recorder import mic_recorder
# core modules
from audio_processing.A2T import A2T
from audio_processing.T2A import T2A
from llm.utils.chat import Conversation
from vlm.vlm import VLM
# utils modules
from utils.keywords import keywords
from utils.prompt_toggle import select_prompt, load_prompts
from utils.image_caption import ImageCaption
from utils.documentation import html_content
from utils.payment import html_doge_wallet
from utils.statement_evaluation_command import get_response
prompts = load_prompts()
chat = Conversation()
t2a = T2A()
vlm = VLM()
ic = ImageCaption()
text_dict = {}
def remove_labels_with_regex(text: str):
pattern = r'^(Human:|AI:|Chelsea:)\s*'
cleaned_text = re.sub(pattern, '', text, flags=re.MULTILINE)
return cleaned_text
def exctrator(sentence, phrase):
extracted_text = sentence.split(phrase)[1].strip() if phrase in sentence else ""
return extracted_text
def switching(text):
result = None
if re.search("show me your image", text.lower(), re.IGNORECASE):
prompt = exctrator(text.lower(), phrase="show me your image")
# Завантажуємо зображення
uploaded_image = ic.load_image()
if uploaded_image is not None:
# Якщо зображення завантажено, виконуємо обробку
result = ic.send2ai(model=vlm, prompt=prompt)
else:
# Якщо зображення ще не завантажене, показуємо попередження
st.warning("No image uploaded yet. Please upload an image to continue.")
elif re.search("show me documentation", text.lower(), re.IGNORECASE):
components.html(html_content, height=800, scrolling=True)
elif re.search("pay the ghost", text.lower(), re.IGNORECASE):
components.html(html_doge_wallet, height=600, scrolling=False)
elif re.search("long live liberty", text.lower(), re.IGNORECASE):
exctracted_statement = exctrator(text.lower(), phrase="long live liberty")
print(f"exctracted_statement: {exctracted_statement}")
prompt = f"Evaluate how far the statement from libertarian ideology 1 to 10 and why?\n\n{exctracted_statement}"
print(f"Liberty prompt: {prompt}")
result = get_response(prompt)
else:
prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
result = chat.chatting(prompt=prompt if prompt is not None else text)
print(f"Prompt:\n{prompt}")
return result
def get_text():
try:
mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=False, use_container_width=True)
start_time = time.perf_counter()
a2t = A2T(mic["bytes"])
text = a2t.predict()
print(f"Text from A2T:\n{text}")
execution_time = time.perf_counter() - start_time
print(f"App.py -> get_text() -> time of execution A2T -> {execution_time}s")
text_dict['text'] = text
return text
except Exception as e:
print(f"An error occurred in get_text function, reason is: {e}")
return None # Повертаємо None у випадку помилки
def speaking(text):
try:
if text and text.strip() != "":
print(f"Checking for execution this part {random.randint(0, 5)}")
output = switching(text)
response = remove_labels_with_regex(text=output)
start_time_t2a = time.perf_counter()
t2a.autoplay(response)
execution_time_t2a = time.perf_counter() - start_time_t2a
print(f"App.py -> speaking() -> time of execution T2A -> {execution_time_t2a}s")
print(ic.pil_image)
if response:
st.markdown(f"Your input: {text}")
st.markdown(f"Chelsea response: {response}")
except Exception as e:
print(f"An error occurred in speaking function, reason is: {e}")
def main():
text = get_text()
if text is None and 'text' in text_dict:
text = text_dict['text']
print(f"Text dict: {text_dict}")
print(f"Print text: s{text}s")
speaking(text)
print(f"Checking for execution main func {random.randint(0, 10)}")
if __name__ == "__main__":
main() |