OpenCHAT-mini2

Running

App Files Files Community

KingNish commited on Jul 21, 2024

Commit

4e7357a

verified ·

1 Parent(s): ca93a15

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -40

app.py CHANGED Viewed

@@ -17,11 +17,9 @@ model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
 processor = LlavaProcessor.from_pretrained(model_id)
-model = LlavaForConditionalGeneration.from_pretrained(model_id)
 model.to("cpu")
-def replace_video_with_images(text, frames):
-    return text.replace("<video>", "<image>" * frames)
 def sample_frames(video_file) :
     try:
@@ -90,51 +88,26 @@ def respond(message, history):
     vqa = ""
     user_prompt = message
-    message_text = message["text"]
     # Handle image processing
-    if message["files"]:
         txt = user_prompt["text"]
         img = user_prompt["files"]
-        if len(message["files"]) == 1:
-            image = [message["files"][0]]
-        elif len(message["files"]) > 1:
-            image = [msg for msg in message["files"]]
         video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
         image_extensions = Image.registered_extensions()
         image_extensions = tuple([ex for ex, f in image_extensions.items()])
-        if len(image) == 1:
-            if image[0].endswith(video_extensions):
-                gr.Info(f"Analyzing video")
-                image = sample_frames(image[0])
-                image_tokens = "<image>" * int(len(image))
-                prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
-            elif image[0].endswith(image_extensions):
-                gr.Info("Analyzing image")
-                image = Image.open(image[0]).convert("RGB")
-                prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
-        elif len(image) > 1:
-            image_list = []
-            for img in image:
-                if img.endswith(image_extensions):
-                    gr.Info("Analyzing image")
-                    img = Image.open(img).convert("RGB")
-                    image_list.append(img)
-                elif img.endswith(video_extensions):
-                    gr.Info(f"Analyzing video")
-                    frames = sample_frames(img)
-                    for frame in frames:
-                        image_list.append(frame)
-            image_tokens = "<image>" * len(image_list)
             prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
-            image = image_list
         inputs = processor(prompt, image, return_tensors="pt")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
@@ -143,6 +116,7 @@ def respond(message, history):
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
         for new_text in streamer:
@@ -158,6 +132,7 @@ def respond(message, history):
         {"type": "function", "function": {"name": "image_qna", "description": "Answer question asked by user related to image", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Question by user"}}, "required": ["query"]}}},
     ]
     func_caller.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }}  </functioncall>  [USER] {message} {vqa}'})
     response = client_gemma.chat_completion(func_caller, max_tokens=150)
@@ -199,6 +174,7 @@ def respond(message, history):
             query = query.replace(" ", "%20")
             image = f"![](https://image.pollinations.ai/prompt/{query}?seed={seed})"
             yield image
             gr.Info("We are going to Update Our Image Generation Engine to more powerful ones in Next Update. ThankYou")
         elif json_data["name"] == "image_qna":
             messages = f"<|start_header_id|>system\nYou are OpenGPT 4o mini a helpful assistant made by KingNish. You are provide with both images and captions and Your task is to answer of user with help of caption provided. Answer in human style and show emotions.<|end_header_id|>"

 processor = LlavaProcessor.from_pretrained(model_id)
+model = LlavaForConditionalGeneration.from_pretrained(model_id, low_cpu_mem_usage=True)
 model.to("cpu")
 def sample_frames(video_file) :
     try:
     vqa = ""
     user_prompt = message
     # Handle image processing
+    if message["files"]:
+        image = user_prompt["files"][-1]
         txt = user_prompt["text"]
         img = user_prompt["files"]
         video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
         image_extensions = Image.registered_extensions()
         image_extensions = tuple([ex for ex, f in image_extensions.items()])
+        if image.endswith(video_extensions):
+            gr.Info(f"Analyzing {video_extensions} file")
+            image = sample_frames(image)
+            image_tokens = "<image>" * int(len(image))
             prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
+        elif image.endswith(image_extensions):
+            gr.Info("Analyzing image")
+            image = Image.open(image).convert("RGB")
+            prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
         inputs = processor(prompt, image, return_tensors="pt")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
         thread.start()
+        gr.Info("Generating output")
         buffer = ""
         for new_text in streamer:
         {"type": "function", "function": {"name": "image_qna", "description": "Answer question asked by user related to image", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Question by user"}}, "required": ["query"]}}},
     ]
+    message_text = message["text"]
     func_caller.append({"role": "user", "content": f'[SYSTEM]You are a helpful assistant. You have access to the following functions: \n {str(functions_metadata)}\n\nTo use these functions respond with:\n<functioncall> {{ "name": "function_name", "arguments": {{ "arg_1": "value_1", "arg_1": "value_1", ... }} }}  </functioncall>  [USER] {message} {vqa}'})
     response = client_gemma.chat_completion(func_caller, max_tokens=150)
             query = query.replace(" ", "%20")
             image = f"![](https://image.pollinations.ai/prompt/{query}?seed={seed})"
             yield image
+            time.sleep(5)
             gr.Info("We are going to Update Our Image Generation Engine to more powerful ones in Next Update. ThankYou")
         elif json_data["name"] == "image_qna":
             messages = f"<|start_header_id|>system\nYou are OpenGPT 4o mini a helpful assistant made by KingNish. You are provide with both images and captions and Your task is to answer of user with help of caption provided. Answer in human style and show emotions.<|end_header_id|>"