from dotenv import load_dotenv, find_dotenv
from transformers import pipeline
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
load_dotenv(find_dotenv())


def img2txt(image_path):
    """ Convert image to text using Hugging Face pipeline.
    Args:
        image_path (str): Path to the image.
    Returns:
        str: The text extracted from the image.
    """
    itt = pipeline(
        "image-to-text",
        model="Salesforce/blip-image-captioning-base"
    )
    text = itt(image_path)[0]["generated_text"]
    print(text)
    return text


def generate_story(scenario, repo_id="mistralai/Mistral-7B-Instruct-v0.2"):
    """ Generate a story using image captioning and language model.
    Args:
        scenario (str): The scenario extracted from the image.
    Returns:
        str: The story generated using the scenario.
    """
    llm = HuggingFaceEndpoint(
        repo_id=repo_id,
        temperature=0.5,
        streaming=True
    )
    prompt_template = """
    You are a kids story writer. Provide a coherent story for kids
    using this simple instruction: {scenario}. The story should have a clear
    beginning, middle, and end. The story should be interesting and engaging for
    kids. The story should be maximum 200 words long. Do not include
    any adult or polemic content.
    Story:
    """
    prompt = PromptTemplate.from_template(prompt_template)
    story = prompt | llm
    return story.invoke(input={"scenario": scenario})


if __name__ == "__main__":
    my_story = generate_story(img2txt("image.jpg"))
    print(my_story)