Spaces:

Mahmoudmody777
/

smvideo

Sleeping

App Files Files Community

Mahmoudmody777 commited on Nov 8, 2024

Commit

0c44bc5

verified ·

1 Parent(s): a3fce17

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -72

app.py CHANGED Viewed

@@ -9,61 +9,122 @@ from PIL import Image, ImageDraw, ImageFont
 import arabic_reshaper
 from bidi.algorithm import get_display
 from pydub import AudioSegment
-# تحميل نموذج Whisper
-model = whisper.load_model("base")
-def prepare_arabic_text(text):
     """تحضير النص العربي للعرض بشكل صحيح"""
-    reshaped_text = arabic_reshaper.reshape(text)
-    bidi_text = get_display(reshaped_text)
-    return bidi_text
-def create_text_image(text, size=(720, 480), font_size=30):
     """إنشاء صورة تحتوي على نص"""
-    # إنشاء صورة جديدة مع خلفية سوداء
-    img = Image.new('RGB', size, 'black')
     draw = ImageDraw.Draw(img)
     try:
-        # محاولة تحميل الخط
-        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size)
-    except:
-        try:
-            # محاولة بديلة لتحميل الخط
-            font = ImageFont.truetype("arial.ttf", font_size)
-        except:
-            # استخدام الخط الافتراضي إذا فشلت المحاولات السابقة
-            font = ImageFont.load_default()
-    # تحضير النص وحساب أبعاده
     if any('\u0600' <= c <= '\u06FF' for c in text):
-        text = prepare_arabic_text(text)
-    # حساب موقع النص في وسط الصورة
     text_bbox = draw.textbbox((0, 0), text, font=font)
     text_width = text_bbox[2] - text_bbox[0]
     text_height = text_bbox[3] - text_bbox[1]
-    x = (size[0] - text_width) // 2
-    y = (size[1] - text_height) // 2
     # رسم النص
-    draw.text((x, y), text, font=font, fill='white')
     return np.array(img)
-def convert_audio_to_wav(audio_path):
-    """تحويل الملف الصوتي إلى صيغة WAV"""
-    audio_path = Path(audio_path)
-    if audio_path.suffix.lower() != '.wav':
-        wav_path = audio_path.with_suffix('.wav')
-        audio = AudioSegment.from_file(str(audio_path))
-        audio.export(str(wav_path), format='wav')
-        return str(wav_path)
-    return str(audio_path)
-def create_video_with_text(audio_path, transcription):
     """إنشاء فيديو مع نص متزامن"""
     try:
         # تحويل الملف الصوتي إلى WAV
@@ -73,29 +134,33 @@ def create_video_with_text(audio_path, transcription):
         audio_clip = AudioFileClip(wav_path)
         duration = audio_clip.duration
-        # تقسيم النص إلى أجزاء
-        words = transcription.split()
-        total_frames = int(duration * 24)  # 24 FPS
-        words_per_frame = max(1, len(words) // total_frames)
         # إنشاء قائمة المقاطع
         clips = []
         current_time = 0
-        frame_duration = duration / (len(words) / words_per_frame)
-        for i in range(0, len(words), words_per_frame):
-            # تجميع الكلمات لهذا الإطار
-            frame_words = ' '.join(words[i:i + words_per_frame])
             # إنشاء صورة للنص
-            text_image = create_text_image(frame_words)
             # تحويل الصورة إلى مقطع
-            text_clip = ImageClip(text_image).set_duration(frame_duration)
             # إضافة المقطع مع توقيته
             clips.append(text_clip.set_start(current_time))
-            current_time += frame_duration
         # دمج جميع المقاطع
         video = CompositeVideoClip(clips, size=(720, 480))
@@ -119,43 +184,43 @@ def create_video_with_text(audio_path, transcription):
         print(f"خطأ في create_video_with_text: {str(e)}")
         raise
-def process_audio(audio_path):
-    """معالجة الملف الصوتي وإنشاء الفيديو"""
-    try:
-        if not isinstance(audio_path, str):
-            raise ValueError("يجب أن يكون المدخل مسار ملف صوتي")
-        if not os.path.exists(audio_path):
-            raise FileNotFoundError("الملف غير موجود")
-        # تحويل الصوت إلى نص
-        print("جاري تحويل الصوت إلى نص...")
-        result = model.transcribe(audio_path)
-        transcription = result["text"]
-        print("تم استخراج النص بنجاح")
-        # إنشاء الفيديو
-        print("جاري إنشاء الفيديو...")
-        video_path = create_video_with_text(audio_path, transcription)
-        print("تم إنشاء الفيديو بنجاح")
-        return video_path, transcription
-    except Exception as e:
-        print(f"خطأ في process_audio: {str(e)}")
-        return None, f"حدث خطأ أثناء المعالجة: {str(e)}"
 # إنشاء واجهة Gradio
 iface = gr.Interface(
     fn=process_audio,
     inputs=[
-        gr.Audio(type="filepath", label="قم بتحميل ملف صوتي (MP3 أو WAV)")
     ],
     outputs=[
         gr.Video(label="الفيديو المنشأ"),
         gr.Textbox(label="النص المستخرج")
     ],
     title="محول الصوت إلى فيديو مع النص",
-    description="قم بتحميل ملف صوتي لإنشاء فيديو مع نص متزامن. يدعم اللغتين العربية والإنجليزية.",
     examples=[],
     cache_examples=False
 )

 import arabic_reshaper
 from bidi.algorithm import get_display
 from pydub import AudioSegment
+import glob
+# تحميل نموذج Whisper - استخدام النموذج المتوسط لدقة أفضل في اللغة العربية
+model = whisper.load_model("medium")
+def get_available_fonts():
+    """الحصول على قائمة الخطوط المتاحة من مجلد fonts"""
+    fonts_dir = "fonts"
+    if not os.path.exists(fonts_dir):
+        os.makedirs(fonts_dir)
+    font_files = glob.glob(os.path.join(fonts_dir, "*.ttf")) + \
+                 glob.glob(os.path.join(fonts_dir, "*.TTF")) + \
+                 glob.glob(os.path.join(fonts_dir, "*.otf")) + \
+                 glob.glob(os.path.join(fonts_dir, "*.OTF"))
+    return [os.path.basename(f) for f in font_files]
+def prepare_arabic_text(text, font_path):
     """تحضير النص العربي للعرض بشكل صحيح"""
+    try:
+        reshaped_text = arabic_reshaper.reshape(text)
+        bidi_text = get_display(reshaped_text)
+        return bidi_text
+    except Exception as e:
+        print(f"خطأ في معالجة النص العربي: {str(e)}")
+        return text
+def create_text_image(text, size=(720, 480), font_path="fonts/bein-normal.ttf",
+                     font_size=30, text_color='white', background_color='black',
+                     background_image=None, text_position='center'):
     """إنشاء صورة تحتوي على نص"""
+    # استخدام الصورة الخلفية إذا كانت موجودة
+    if background_image:
+        img = Image.open(background_image).convert('RGB')
+        img = img.resize(size)
+    else:
+        img = Image.new('RGB', size, background_color)
     draw = ImageDraw.Draw(img)
     try:
+        # محاولة تحميل الخط المحدد
+        font = ImageFont.truetype(font_path, font_size)
+    except Exception as e:
+        print(f"خطأ في تحميل الخط {font_path}: {str(e)}")
+        # استخدام الخط الافتراضي
+        font = ImageFont.load_default()
+    # تحضير النص
     if any('\u0600' <= c <= '\u06FF' for c in text):
+        text = prepare_arabic_text(text, font_path)
+    # حساب أبعاد النص
     text_bbox = draw.textbbox((0, 0), text, font=font)
     text_width = text_bbox[2] - text_bbox[0]
     text_height = text_bbox[3] - text_bbox[1]
+    # تحديد موقع النص
+    if text_position == 'center':
+        x = (size[0] - text_width) // 2
+        y = (size[1] - text_height) // 2
+    elif text_position == 'bottom':
+        x = (size[0] - text_width) // 2
+        y = size[1] - text_height - 20
+    elif text_position == 'top':
+        x = (size[0] - text_width) // 2
+        y = 20
+    # إضافة خلفية شبه شفافة للنص
+    padding = 10
+    background_box = [
+        x - padding,
+        y - padding,
+        x + text_width + padding,
+        y + text_height + padding
+    ]
+    draw.rectangle(background_box, fill=(0, 0, 0, 128))
     # رسم النص
+    draw.text((x, y), text, font=font, fill=text_color)
     return np.array(img)
+def process_audio(audio_path, font_name="bein-normal.ttf", background_image=None,
+                 text_position='center', editable_text=None):
+    """معالجة الملف الصوتي وإنشاء الفيديو"""
+    try:
+        if not isinstance(audio_path, str):
+            raise ValueError("يجب أن يكون المدخل مسار ملف صوتي")
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError("الملف غير موجود")
+        # تحويل الصوت إلى نص
+        print("جاري تحويل الصوت إلى نص...")
+        result = model.transcribe(audio_path, language='ar')  # تحديد اللغة العربية
+        # استخدام النص المعدل إذا تم توفيره
+        transcription = editable_text if editable_text else result["text"]
+        print("تم استخراج النص بنجاح")
+        # إنشاء الفيديو
+        print("جاري إنشاء الفيديو...")
+        font_path = os.path.join("fonts", font_name)
+        video_path = create_video_with_text(audio_path, transcription, font_path,
+                                          background_image, text_position)
+        print("تم إنشاء الفيديو بنجاح")
+        return video_path, transcription
+    except Exception as e:
+        print(f"خطأ في process_audio: {str(e)}")
+        return None, f"حدث خطأ أثناء المعالجة: {str(e)}"
+def create_video_with_text(audio_path, transcription, font_path, background_image=None,
+                         text_position='center'):
     """إنشاء فيديو مع نص متزامن"""
     try:
         # تحويل الملف الصوتي إلى WAV
         audio_clip = AudioFileClip(wav_path)
         duration = audio_clip.duration
+        # تقسيم النص إلى جمل
+        sentences = [s.strip() for s in transcription.split('.') if s.strip()]
+        if not sentences:
+            sentences = [transcription]
+        # حساب مدة كل جملة
+        sentence_duration = duration / len(sentences)
         # إنشاء قائمة المقاطع
         clips = []
         current_time = 0
+        for sentence in sentences:
             # إنشاء صورة للنص
+            text_image = create_text_image(
+                sentence,
+                font_path=font_path,
+                background_image=background_image,
+                text_position=text_position
+            )
             # تحويل الصورة إلى مقطع
+            text_clip = ImageClip(text_image).set_duration(sentence_duration)
             # إضافة المقطع مع توقيته
             clips.append(text_clip.set_start(current_time))
+            current_time += sentence_duration
         # دمج جميع المقاطع
         video = CompositeVideoClip(clips, size=(720, 480))
         print(f"خطأ في create_video_with_text: {str(e)}")
         raise
+def convert_audio_to_wav(audio_path):
+    """تحويل الملف الصوتي إلى صيغة WAV"""
+    audio_path = Path(audio_path)
+    if audio_path.suffix.lower() != '.wav':
+        wav_path = audio_path.with_suffix('.wav')
+        audio = AudioSegment.from_file(str(audio_path))
+        audio.export(str(wav_path), format='wav')
+        return str(wav_path)
+    return str(audio_path)
 # إنشاء واجهة Gradio
+available_fonts = get_available_fonts()
 iface = gr.Interface(
     fn=process_audio,
     inputs=[
+        gr.Audio(type="filepath", label="قم بتحميل ملف صوتي (MP3 أو WAV)"),
+        gr.Dropdown(choices=available_fonts, value=available_fonts[0] if available_fonts else None,
+                   label="اختر الخط"),
+        gr.Image(label="صورة الخلفية (اختياري)", type="filepath"),
+        gr.Radio(["top", "center", "bottom"], value="center",
+                label="موقع النص", info="اختر موقع النص في الفيديو"),
+        gr.Textbox(label="تعديل النص (اختياري)", placeholder="اترك فارغاً لاستخدام النص المستخرج تلقائياً")
     ],
     outputs=[
         gr.Video(label="الفيديو المنشأ"),
         gr.Textbox(label="النص المستخرج")
     ],
     title="محول الصوت إلى فيديو مع النص",
+    description="""
+    قم بتحميل ملف صوتي لإنشاء فيديو مع نص متزامن.
+    - يدعم اللغة العربية بشكل كامل
+    - يمكنك اختيار الخط المناسب
+    - يمكنك إضافة صورة خلفية
+    - يمكنك تحديد موقع النص
+    - يمكنك تعديل النص المستخرج
+    """,
     examples=[],
     cache_examples=False
 )