from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from llama_cpp import Llama def load_llm_model(): try: llm = Llama( model_path="Llama-3.2-1B-Instruct-Q8_0.gguf", n_gpu_layers = -1, n_ctx=100000, n_batch=4096, ) print("LLM model loaded successfully") return llm except Exception as e: print(f"Error loading LLM model: {e}") raise def get_text_from_pdf(file): loader = PyPDFLoader(file) pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50) texts = text_splitter.split_documents(pages) final_text = "" for text in texts: if text.page_content.startswith("REFERENCES"): break else: final_text = final_text + text.page_content research_paper = "" for text in final_text: if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")): break else: research_paper = research_paper + text return research_paper[:10000] def generate_prompt(research_paper): prompt = f''' As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n Importantly your output must use language \\"English\\"" ''' return prompt def generate_mindmap_structure(llm, prompt): response = llm.create_chat_completion( messages = [ {'role':'system', 'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'}, {'role':'user', 'content': prompt} ], temperature=0.7, top_k=200, top_p=3.0, ) mindmap_data = response['choices'][0]['message']['content'] return mindmap_data def generate_markdown(llm, file): final_text = get_text_from_pdf(file) prompt = generate_prompt(final_text) mindmap_markdown = generate_mindmap_structure(llm, prompt) if "**" in mindmap_markdown: mindmap_markdown = mindmap_markdown.replace("- **", "### ") mindmap_markdown = mindmap_markdown.replace("**", "") else: pass return mindmap_markdown def sanitize_markdown(llm, mindmap_markdown): prompt = f''' As an experienced coder and programmer, help me convert the text \\"{mindmap_markdown}\\" into a well-formatted markdown. Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nDo not include anything in the response, that is not the part of mindmap." ''' sanitized_markdown = generate_mindmap_structure(llm, prompt) return sanitized_markdown