Spaces:

Mahmoudmody777
/

smvideo

Sleeping

App Files Files Community

Mahmoudmody777 commited on Nov 8, 2024

Commit

3661e51

verified ·

1 Parent(s): 6e25931

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -53

app.py CHANGED Viewed

@@ -1,21 +1,15 @@
 import gradio as gr
 import whisper
-from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip, ColorClip
-from moviepy.config import change_settings
 import tempfile
 import os
 from pathlib import Path
 import numpy as np
-from PIL import ImageFont
 import arabic_reshaper
 from bidi.algorithm import get_display
-import soundfile as sf
 from pydub import AudioSegment
-# تكوين MoviePy لاستخدام ImageMagick
-if os.name == 'nt':  # للويندوز
-    change_settings({"IMAGEMAGICK_BINARY": "magick"})
 # تحميل نموذج Whisper
 model = whisper.load_model("base")
@@ -25,8 +19,42 @@ def prepare_arabic_text(text):
     bidi_text = get_display(reshaped_text)
     return bidi_text
 def convert_audio_to_wav(audio_path):
-    """تحويل الملف الصوتي إلى صيغة WAV إذا لم يكن كذلك"""
     audio_path = Path(audio_path)
     if audio_path.suffix.lower() != '.wav':
         wav_path = audio_path.with_suffix('.wav')
@@ -35,20 +63,6 @@ def convert_audio_to_wav(audio_path):
         return str(wav_path)
     return str(audio_path)
-def create_text_frames(transcription, duration, fps=24):
-    """إنشاء إطارات النص المتحركة"""
-    words = transcription.split()
-    words_per_frame = max(1, len(words) // int(duration * fps))
-    frames = []
-    for i in range(0, len(words), words_per_frame):
-        frame_words = ' '.join(words[i:i + words_per_frame])
-        if any('\u0600' <= c <= '\u06FF' for c in frame_words):
-            frame_words = prepare_arabic_text(frame_words)
-        frames.append(frame_words)
-    return frames
 def create_video_with_text(audio_path, transcription):
     """إنشاء فيديو مع نص متزامن"""
     try:
@@ -59,39 +73,32 @@ def create_video_with_text(audio_path, transcription):
         audio_clip = AudioFileClip(wav_path)
         duration = audio_clip.duration
-        # إنشاء إطارات النص
-        frames = create_text_frames(transcription, duration)
-        # إنشاء خلفية سوداء
-        background = ColorClip(size=(720, 480), color=(0, 0, 0))
-        # إنشاء مقاطع النص
-        text_clips = []
-        frame_duration = duration / len(frames)
-        for i, frame_text in enumerate(frames):
-            try:
-                txt_clip = TextClip(
-                    frame_text,
-                    fontsize=30,
-                    color='white',
-                    bg_color='black',
-                    size=(720, 480),
-                    method='caption',
-                    font='Arial-Regular'  # استخدام اسم خط كامل
-                ).set_start(i * frame_duration).set_duration(frame_duration)
-                text_clips.append(txt_clip)
-            except Exception as e:
-                print(f"Error creating text clip for frame {i}: {str(e)}")
-                continue
-        if not text_clips:
-            raise Exception("لم يتم إنشاء أي مقاطع نصية")
         # دمج جميع المقاطع
-        video = CompositeVideoClip(
-            [background.set_duration(duration)] + text_clips
-        )
         video = video.set_audio(audio_clip)
         # حفظ الفيديو
@@ -109,7 +116,7 @@ def create_video_with_text(audio_path, transcription):
         return video_path
     except Exception as e:
-        print(f"Error in create_video_with_text: {str(e)}")
         raise
 def process_audio(audio_path):
@@ -122,14 +129,19 @@ def process_audio(audio_path):
             raise FileNotFoundError("الملف غير موجود")
         # تحويل الصوت إلى نص
         result = model.transcribe(audio_path)
         transcription = result["text"]
         # إنشاء الفيديو
         video_path = create_video_with_text(audio_path, transcription)
         return video_path, transcription
     except Exception as e:
         return None, f"حدث خطأ أثناء المعالجة: {str(e)}"
 # إنشاء واجهة Gradio

 import gradio as gr
 import whisper
+from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip
 import tempfile
 import os
 from pathlib import Path
 import numpy as np
+from PIL import Image, ImageDraw, ImageFont
 import arabic_reshaper
 from bidi.algorithm import get_display
 from pydub import AudioSegment
 # تحميل نموذج Whisper
 model = whisper.load_model("base")
     bidi_text = get_display(reshaped_text)
     return bidi_text
+def create_text_image(text, size=(720, 480), font_size=30):
+    """إنشاء صورة تحتوي على نص"""
+    # إنشاء صورة جديدة مع خلفية سوداء
+    img = Image.new('RGB', size, 'black')
+    draw = ImageDraw.Draw(img)
+    try:
+        # محاولة تحميل الخط
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
+    except:
+        try:
+            # محاولة بديلة لتحميل الخط
+            font = ImageFont.truetype("arial.ttf", font_size)
+        except:
+            # استخدام الخط الافتراضي إذا فشلت المحاولات السابقة
+            font = ImageFont.load_default()
+    # تحضير النص وحساب أبعاده
+    if any('\u0600' <= c <= '\u06FF' for c in text):
+        text = prepare_arabic_text(text)
+    # حساب موقع النص في وسط الصورة
+    text_bbox = draw.textbbox((0, 0), text, font=font)
+    text_width = text_bbox[2] - text_bbox[0]
+    text_height = text_bbox[3] - text_bbox[1]
+    x = (size[0] - text_width) // 2
+    y = (size[1] - text_height) // 2
+    # رسم النص
+    draw.text((x, y), text, font=font, fill='white')
+    return np.array(img)
 def convert_audio_to_wav(audio_path):
+    """تحويل الملف الصوتي إلى صيغة WAV"""
     audio_path = Path(audio_path)
     if audio_path.suffix.lower() != '.wav':
         wav_path = audio_path.with_suffix('.wav')
         return str(wav_path)
     return str(audio_path)
 def create_video_with_text(audio_path, transcription):
     """إنشاء فيديو مع نص متزامن"""
     try:
         audio_clip = AudioFileClip(wav_path)
         duration = audio_clip.duration
+        # تقسيم النص إلى أجزاء
+        words = transcription.split()
+        total_frames = int(duration * 24)  # 24 FPS
+        words_per_frame = max(1, len(words) // total_frames)
+        # إنشاء قائمة المقاطع
+        clips = []
+        current_time = 0
+        frame_duration = duration / (len(words) / words_per_frame)
+        for i in range(0, len(words), words_per_frame):
+            # تجميع الكلمات لهذا الإطار
+            frame_words = ' '.join(words[i:i + words_per_frame])
+            # إنشاء صورة للنص
+            text_image = create_text_image(frame_words)
+            # تحويل الصورة إلى مقطع
+            text_clip = ImageClip(text_image).set_duration(frame_duration)
+            # إضافة المقطع مع توقيته
+            clips.append(text_clip.set_start(current_time))
+            current_time += frame_duration
         # دمج جميع المقاطع
+        video = CompositeVideoClip(clips, size=(720, 480))
         video = video.set_audio(audio_clip)
         # حفظ الفيديو
         return video_path
     except Exception as e:
+        print(f"خطأ في create_video_with_text: {str(e)}")
         raise
 def process_audio(audio_path):
             raise FileNotFoundError("الملف غير موجود")
         # تحويل الصوت إلى نص
+        print("جاري تحويل الصوت إلى نص...")
         result = model.transcribe(audio_path)
         transcription = result["text"]
+        print("تم استخراج النص بنجاح")
         # إنشاء الفيديو
+        print("جاري إنشاء الفيديو...")
         video_path = create_video_with_text(audio_path, transcription)
+        print("تم إنشاء الفيديو بنجاح")
         return video_path, transcription
     except Exception as e:
+        print(f"خطأ في process_audio: {str(e)}")
         return None, f"حدث خطأ أثناء المعالجة: {str(e)}"
 # إنشاء واجهة Gradio