from dotenv import load_dotenv, find_dotenv from transformers import pipeline from langchain_huggingface import HuggingFaceEndpoint from langchain_core.prompts import PromptTemplate load_dotenv(find_dotenv()) def img2txt(image_path): """ Convert image to text using Hugging Face pipeline. Args: image_path (str): Path to the image. Returns: str: The text extracted from the image. """ itt = pipeline( "image-to-text", model="Salesforce/blip-image-captioning-base" ) text = itt(image_path)[0]["generated_text"] print(text) return text def generate_story(scenario, repo_id="mistralai/Mistral-7B-Instruct-v0.2"): """ Generate a story using image captioning and language model. Args: scenario (str): The scenario extracted from the image. Returns: str: The story generated using the scenario. """ llm = HuggingFaceEndpoint( repo_id=repo_id, temperature=0.5, streaming=True ) prompt_template = """ You are a kids story writer. Provide a coherent story for kids using this simple instruction: {scenario}. The story should have a clear beginning, middle, and end. The story should be interesting and engaging for kids. The story should be maximum 200 words long. Do not include any adult or polemic content. Story: """ prompt = PromptTemplate.from_template(prompt_template) story = prompt | llm return story.invoke(input={"scenario": scenario}) if __name__ == "__main__": my_story = generate_story(img2txt("image.jpg")) print(my_story)