Spaces:

CineAI
/

Chelsea

Sleeping

App Files Files Community

CineAI commited on Oct 6

Commit

7378fc8

•

1 Parent(s): 91b59ba

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -27

app.py CHANGED Viewed

@@ -3,6 +3,8 @@
 # python core libraries
 import re
 import psutil
 # streamlit
 import streamlit as st
 # components from other authors
@@ -11,66 +13,81 @@ from streamlit_mic_recorder import mic_recorder
 from audio_processing.A2T import A2T
 from audio_processing.T2A import T2A
 from llm.utils.chat import Conversation
 # utils modules
 from utils.keywords import keywords
 from utils.prompt_toggle import select_prompt, load_prompts
-from utils.documentation import Documentation
-# TODO:
-#     * Зробити в utils можливість для використання різних промптів -> Done
-#     * Додати як робив на HF хто на  фото -> agent
-#     * Додати можливіть малюнками вирішувати мат проблеми -> agent
-#     * Додати моливість створювати/редагувати докменти(pdf, docx) -> agent
 prompts = load_prompts()
-doc = Documentation()
 chat = Conversation()
 t2a = T2A()
 def remove_labels_with_regex(text: str):
-    removed_dub = remove_duplicates(text)
     pattern = r'^(Human:|AI:|Chelsea:)\s*'
-    cleaned_text = re.sub(pattern, '', removed_dub, flags=re.MULTILINE)
     return cleaned_text
-def remove_duplicates(text):
-    lines = text.split('\n')
-    seen = set()
-    result = []
-    for line in lines:
-        if line not in seen:
-            result.append(line)
-            seen.add(line)
-    return '\n'.join(result)
 def main():
     try:
         mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
         if mic is not None:
             a2t = A2T(mic["bytes"])
             text = a2t.predict()
-            prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
-            print(f"Prompt:\n{prompt}")
-            output_from_chat = chat.chatting(prompt=prompt if prompt is not None else text)
-            response = remove_labels_with_regex(text=output_from_chat)
             t2a.autoplay(response)
             if response:
                 st.markdown(f"Your input: {text}")
                 st.markdown(f"Chelsea response: {response}")
-                prompt = None
                 response = None
-            print(f"Prompt: {prompt}, response: {response}")
     except Exception as e:
         print(f"An error occurred in main finction, reasone is: {e}")
-        doc.execution()
 if __name__ == "__main__":
     print(f"Total Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB")

 # python core libraries
 import re
 import psutil
+import time
+import random
 # streamlit
 import streamlit as st
 # components from other authors
 from audio_processing.A2T import A2T
 from audio_processing.T2A import T2A
 from llm.utils.chat import Conversation
+from vlm.vlm import VLM
 # utils modules
 from utils.keywords import keywords
 from utils.prompt_toggle import select_prompt, load_prompts
+from utils.documentation import create_hand_gesture_doc
+from utils.image_caption import ImageCaption
 prompts = load_prompts()
 chat = Conversation()
 t2a = T2A()
+vlm = VLM()
+ic = ImageCaption()
 def remove_labels_with_regex(text: str):
     pattern = r'^(Human:|AI:|Chelsea:)\s*'
+    cleaned_text = re.sub(pattern, '', text, flags=re.MULTILINE)
     return cleaned_text
+def exctrator(sentence, phrase="show me your image"):
+    extracted_text = sentence.split(phrase)[1].strip() if phrase in sentence else ""
+    return extracted_text
+def switching(text):
+    command = re.search("show me your image", text.lower(), re.IGNORECASE)
+    result = None
+    if command:
+        prompt = exctrator(text.lower())
+        # Завантажуємо зображення
+        uploaded_image = ic.load_image()
+        if uploaded_image is not None:
+            # Якщо зображення завантажено, виконуємо обробку
+            result = ic.send2ai(model=vlm, prompt=prompt)
+        else:
+            # Якщо зображення ще не завантажене, показуємо попередження
+            st.warning("No image uploaded yet. Please upload an image to continue.")
+    else:
+        prompt = select_prompt(input_text=text, prompts=prompts, keywords=keywords)
+        result = chat.chatting(prompt=prompt if prompt is not None else text)
+    print(f"Prompt:\n{prompt}")
+    prompt = None
+    return result
 def main():
     try:
         mic = mic_recorder(start_prompt="Record", stop_prompt="Stop", just_once=True, use_container_width=True)
         if mic is not None:
+            start_time = time.perf_counter()
             a2t = A2T(mic["bytes"])
             text = a2t.predict()
+            print(f"Text from A2T:\n{text}")
+            execution_time = time.perf_counter() - start_time
+            print(f"App.py -> main() -> time of execution A2T -> {execution_time}s")
+            output = switching(text)
+            response = remove_labels_with_regex(text=output)
+            start_time_t2a = time.perf_counter()
             t2a.autoplay(response)
+            execution_time_t2a = time.perf_counter() - start_time_t2a
+            print(f"App.py -> main() -> time of execution T2A -> {execution_time_t2a}s")
+            print(ic.pil_image)
             if response:
                 st.markdown(f"Your input: {text}")
                 st.markdown(f"Chelsea response: {response}")
+                if ic.pil_image is not None:
+                    st.image(ic.pil_image, caption="Uploaded Image", use_column_width=True)
                 response = None
     except Exception as e:
         print(f"An error occurred in main finction, reasone is: {e}")
 if __name__ == "__main__":
     print(f"Total Memory: {psutil.virtual_memory().total / (1024**3):.2f} GB")