import gradio as gr
from typing import Dict

from moviepy.editor import VideoFileClip, concatenate_videoclips

class VideoCreator:
    def __init__(self,
                 tts_pipeline,
                 image_pipeline) -> None:
        
        self.tts_pipeline = tts_pipeline
        self.image_pipeline = image_pipeline
        
    def create_video(self, scenes: Dict) -> str:
        videos_dict = {}
        for index, scene in enumerate(scenes):
            video_scene = self._create_video_from_scene(scenes[scene])
            videos_dict[index] = video_scene
        merged_video = self._merge_videos(videos_dict)
        return merged_video
    
    def _create_video_from_scene(self, scene: Dict) -> str:
        audio_file = self._get_audio_from_text(scene["Summary"])
        bg_image = self._get_bg_image_from_description(scene["Illustration"])
        video = gr.make_waveform(audio=audio_file,
                                 bg_image=bg_image)
        return video
    
    def _get_audio_from_text(self, voice_over: str) -> str:
        self.tts_pipeline.tts_to_file(text=voice_over,
                                      file_path="output.wav")
        return "output.wav"
    
    def _get_bg_image_from_description(self, img_desc: str):
        images = self.image_pipeline(img_desc)
        print("Image generated!")
        image_output = images.images[0]
        image_output.save("img.png")
        return "img.png"

    def _merge_videos(self, videos_dict: Dict) -> str:
        videos_to_concatenate = []
        for video in range(len(videos_dict)):
            video_clip = VideoFileClip(videos_dict[video])
            videos_to_concatenate.append(video_clip)
        final_video = concatenate_videoclips(videos_to_concatenate)
        final_video.write_videofile("final_video.mp4")
        return "final_video.mp4"