MindMap / generate_markdown.py
raannakasturi's picture
Update generate_markdown.py
084b8b9 verified
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from llama_cpp import Llama
def load_llm_model():
try:
llm = Llama(
model_path="/home/user/app/Llama-3.2-1B-Instruct-Q8_0.gguf",
n_gpu_layers = -1,
n_ctx=100000,
n_batch=4096,
)
print("LLM model loaded successfully")
return llm
except Exception as e:
print(f"Error loading LLM model: {e}")
raise
def get_text_from_pdf(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_text = ""
for text in texts:
if text.page_content.startswith("REFERENCES"):
break
else:
final_text = final_text + text.page_content
research_paper = ""
for text in final_text:
if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
break
else:
research_paper = research_paper + text
return research_paper[:10000]
def generate_prompt(research_paper):
prompt = f'''
As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n Importantly your output must use language \\"English\\""
'''
return prompt
def generate_mindmap_structure(llm, prompt):
response = llm.create_chat_completion(
messages = [
{'role':'system',
'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'},
{'role':'user',
'content': prompt}
],
temperature=0.7,
top_k=200,
top_p=3.0,
)
mindmap_data = response['choices'][0]['message']['content']
return mindmap_data
def generate_markdown(llm, file):
final_text = get_text_from_pdf(file)
prompt = generate_prompt(final_text)
mindmap_markdown = generate_mindmap_structure(llm, prompt)
if "**" in mindmap_markdown:
mindmap_markdown = mindmap_markdown.replace("- **", "### ")
mindmap_markdown = mindmap_markdown.replace("**", "")
else:
pass
return mindmap_markdown
def sanitize_markdown(llm, mindmap_markdown):
prompt = f'''
As an experienced coder and programmer, help me convert the text \\"{mindmap_markdown}\\" into a well-formatted markdown. Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nDo not include anything in the response, that is not the part of mindmap."
'''
sanitized_markdown = generate_mindmap_structure(llm, prompt)
return sanitized_markdown