Spaces:

metek7
/

instagram-short-summarizing

Runtime error

App Files Files Community

metek7 commited on Oct 8

Commit

a84dbbf

•

1 Parent(s): 508d056

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
-pip install googletrans
 import spaces
 import gradio as gr
 import subprocess
-from googletrans import Translator
 # Gerekli kütüphanelerin kurulumu
 subprocess.run(
@@ -11,7 +9,7 @@ subprocess.run(
     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
     shell=True,
 )
-subprocess.run("pip install googletrans==3.1.0a0", shell=True)
 import torch
 from llava.model.builder import load_pretrained_model
@@ -24,7 +22,8 @@ from decord import VideoReader, cpu
 import numpy as np
 # Çevirmen nesnesi oluştur
-translator = Translator()
 title = "# 🙋🏻‍♂️🌟Tonic'in 🌋📹LLaVA-Video'suna Hoş Geldiniz!"
 description1 = """**🌋📹LLaVA-Video-7B-Qwen2**, 🌋📹LLaVA-Video-178K veri seti ve LLaVA-OneVision veri seti üzerinde eğitilmiş 7B parametreli bir modeldir. [**Qwen2 dil modeline dayanmaktadır**](https://huggingface.co/collections/Qwen/qwen2-6659360b33528ced941e557f) ve 32K tokene kadar bağlam penceresini destekler. Model, görüntüleri, çoklu görüntüleri ve videoları işleyebilir ve bunlarla etkileşime girebilir, video analizi için özel optimizasyonlara sahiptir.
@@ -84,40 +83,43 @@ print("Model başarıyla yüklendi!")
 @spaces.GPU
 def process_video(video_path, question):
-    max_frames_num = 64
-    video, frame_time, video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
-    video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].to(device).bfloat16()
-    video = [video]
-    conv_template = "qwen_1_5"
-    time_instruction = f"Video {video_time:.2f} saniye sürmektedir ve {len(video[0])} kare uniform olarak örneklenmiştir. Bu kareler {frame_time} konumlarında bulunmaktadır. Lütfen bu videoyla ilgili aşağıdaki soruları cevaplayın."
-    # Soruyu İngilizce'ye çevir
-    question_en = translator.translate(question, dest='en').text
-    full_question = DEFAULT_IMAGE_TOKEN + f"{time_instruction}\n{question_en}"
-    conv = copy.deepcopy(conv_templates[conv_template])
-    conv.append_message(conv.roles[0], full_question)
-    conv.append_message(conv.roles[1], None)
-    prompt_question = conv.get_prompt()
-    input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
-    with torch.no_grad():
-        output = model.generate(
-            input_ids,
-            images=video,
-            modalities=["video"],
-            do_sample=False,
-            temperature=0,
-            max_new_tokens=4096,
-        )
-    response = tokenizer.batch_decode(output, skip_special_tokens=True)[0].strip()
-    # Cevabı Türkçe'ye çevir
-    response_tr = translator.translate(response, dest='tr').text
-    return response_tr
 def gradio_interface(video_file, question):
     if video_file is None:

 import spaces
 import gradio as gr
 import subprocess
+from deep_translator import GoogleTranslator
 # Gerekli kütüphanelerin kurulumu
 subprocess.run(
     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
     shell=True,
 )
+subprocess.run("pip install deep_translator", shell=True)
 import torch
 from llava.model.builder import load_pretrained_model
 import numpy as np
 # Çevirmen nesnesi oluştur
+translator = GoogleTranslator(source='tr', target='en')
+translator_reverse = GoogleTranslator(source='en', target='tr')
 title = "# 🙋🏻‍♂️🌟Tonic'in 🌋📹LLaVA-Video'suna Hoş Geldiniz!"
 description1 = """**🌋📹LLaVA-Video-7B-Qwen2**, 🌋📹LLaVA-Video-178K veri seti ve LLaVA-OneVision veri seti üzerinde eğitilmiş 7B parametreli bir modeldir. [**Qwen2 dil modeline dayanmaktadır**](https://huggingface.co/collections/Qwen/qwen2-6659360b33528ced941e557f) ve 32K tokene kadar bağlam penceresini destekler. Model, görüntüleri, çoklu görüntüleri ve videoları işleyebilir ve bunlarla etkileşime girebilir, video analizi için özel optimizasyonlara sahiptir.
 @spaces.GPU
 def process_video(video_path, question):
+    try:
+        max_frames_num = 64
+        video, frame_time, video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
+        video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].to(device).bfloat16()
+        video = [video]
+        conv_template = "qwen_1_5"
+        time_instruction = f"Video {video_time:.2f} saniye sürmektedir ve {len(video[0])} kare uniform olarak örneklenmiştir. Bu kareler {frame_time} konumlarında bulunmaktadır. Lütfen bu videoyla ilgili aşağıdaki soruları cevaplayın."
+        # Soruyu İngilizce'ye çevir
+        question_en = translator.translate(question)
+        full_question = DEFAULT_IMAGE_TOKEN + f"{time_instruction}\n{question_en}"
+        conv = copy.deepcopy(conv_templates[conv_template])
+        conv.append_message(conv.roles[0], full_question)
+        conv.append_message(conv.roles[1], None)
+        prompt_question = conv.get_prompt()
+        input_ids = tokenizer_image_token(prompt_question, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
+        with torch.no_grad():
+            output = model.generate(
+                input_ids,
+                images=video,
+                modalities=["video"],
+                do_sample=False,
+                temperature=0,
+                max_new_tokens=4096,
+            )
+        response = tokenizer.batch_decode(output, skip_special_tokens=True)[0].strip()
+        # Cevabı Türkçe'ye çevir
+        response_tr = translator_reverse.translate(response)
+        return response_tr
+    except Exception as e:
+        return f"Bir hata oluştu: {str(e)}"
 def gradio_interface(video_file, question):
     if video_file is None: