File size: 3,542 Bytes
e294914 cab263c d022aab 7c29ee2 1581bbf 7378fc8 e294914 d5436e0 f8dfb0f d63135e e294914 7378fc8 e294914 7378fc8 e294914 d022aab 7378fc8 66a2c12 d022aab 7c29ee2 7378fc8 7c29ee2 7378fc8 80a60df 7378fc8 91b59ba 7378fc8 91b59ba 7378fc8 91b59ba 7c29ee2 5a785fa e294914 3e2a726 5a785fa 3e2a726 cf08317 e294914 5a785fa 3e2a726 7378fc8 e294914 7378fc8 cf08317 7378fc8 3e2a726 e294914 0f596d3 e294914 5a785fa e294914 cf08317 5a785fa cf08317 66a2c12 5a785fa 4affef3 5a785fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# version - ArcticMonkeys:30.07.24
# python core libraries
import re
import psutil
import time
import random
# streamlit
import streamlit as st
# components from other authors
from streamlit_mic_recorder import mic_recorder
# core modules
from audio_processing.A2T import A2T
from audio_processing.T2A import T2A
from llm.utils.chat import Conversation
from vlm.vlm import VLM
# utils modules
from utils.keywords import keywords
from utils.prompt_toggle import select_prompt, load_prompts
from utils.image_caption import ImageCaption
prompts = load_prompts()
chat = Conversation()
t2a = T2A()
vlm = VLM()
ic = ImageCaption()
layer_text = ""
def remove_labels_with_regex(text: str):
pattern = r'^(Human:|AI:|Chelsea:)\s*'
cleaned_text = re.sub(pattern, '', text, flags=re.MULTILINE)
return cleaned_text
def exctrator(sentence, phrase="show me your image"):
extracted_text = sentence.split(phrase)[1].strip() if phrase in sentence else ""
return extracted_text
def switching(text):
command = re.search("show me your image", text.lower(), re.IGNORECASE) if text is not None else "Error because your voice requst is None"
result = None
if command:
prompt = exctrator(text.lower())
# Завантажуємо зображення
uploaded_image = ic.load_image()
if uploaded_image is not None:
# Якщо зображення завантажено, виконуємо обробку
result = ic.send2ai(model=vlm, prompt=prompt)
else:
# Якщо зображення ще не завантажене, показуємо попередження
st.warning("No image uploaded yet. Please upload an image to continue.")
else:
prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
result = chat.chatting(prompt=prompt if prompt is not None else text)
print(f"Prompt:\n{prompt}")
return result
def get_text():
try:
mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
start_time = time.perf_counter()
a2t = A2T(mic["bytes"])
text = a2t.predict()
print(f"Text from A2T:\n{text}")
execution_time = time.perf_counter() - start_time
print(f"App.py -> get_text() -> time of execution A2T -> {execution_time}s")
return text
except Exception as e:
print(f"An error occurred in get_text function, reasone is: {e}")
def speaking(text):
try:
if text and text.strip() != "":
print(f"Checking for execution this part {random.randint(0, 5)}")
output = switching(text)
response = remove_labels_with_regex(text=output)
start_time_t2a = time.perf_counter()
t2a.autoplay(response)
execution_time_t2a = time.perf_counter() - start_time_t2a
print(f"App.py -> speaking() -> time of execution T2A -> {execution_time_t2a}s")
print(ic.pil_image)
if response:
st.markdown(f"Your input: {text}")
st.markdown(f"Chelsea response: {response}")
except Exception as e:
print(f"An error occurred in speaking function, reasone is: {e}")
def main():
text = get_text()
print(f"Print text: s{text}s")
layer_text = text
print(f"Print text: s{layer_text}s")
speaking(text)
print(f"Checking for execution main func {random.randint(0, 10)}")
if __name__ == "__main__":
main() |