Spaces:

raannakasturi
/

MindMap

Runtime error

App Files Files Community

raannakasturi commited on 11 days ago

Commit

084b8b9

•

1 Parent(s): 048803a

Update generate_markdown.py

Browse files

Files changed (1) hide show

generate_markdown.py +74 -74

generate_markdown.py CHANGED Viewed

@@ -1,75 +1,75 @@
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.document_loaders import PyPDFLoader
-from llama_cpp import Llama
-def load_llm_model():
-    try:
-        llm = Llama(
-            model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
-            n_gpu_layers = -1,
-            n_ctx=100000,
-            n_batch=4096,
-        )
-        print("LLM model loaded successfully")
-        return llm
-    except Exception as e:
-        print(f"Error loading LLM model: {e}")
-        raise
-def get_text_from_pdf(file):
-    loader = PyPDFLoader(file)
-    pages = loader.load_and_split()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
-    texts = text_splitter.split_documents(pages)
-    final_text = ""
-    for text in texts:
-        if text.page_content.startswith("REFERENCES"):
-            break
-        else:
-            final_text = final_text + text.page_content
-    research_paper = ""
-    for text in final_text:
-        if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
-            break
-        else:
-            research_paper = research_paper + text
-    return research_paper[:10000]
-def generate_prompt(research_paper):
-    prompt = f'''
-    As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n  Importantly your output must use language \\"English\\""
-    '''
-    return prompt
-def generate_mindmap_structure(llm, prompt):
-    response = llm.create_chat_completion(
-        messages = [
-            {'role':'system',
-            'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'},
-            {'role':'user',
-            'content': prompt}
-        ],
-        temperature=0.7,
-        top_k=200,
-        top_p=3.0,
-    )
-    mindmap_data = response['choices'][0]['message']['content']
-    return mindmap_data
-def generate_markdown(llm, file):
-    final_text = get_text_from_pdf(file)
-    prompt = generate_prompt(final_text)
-    mindmap_markdown = generate_mindmap_structure(llm, prompt)
-    if "**" in mindmap_markdown:
-        mindmap_markdown = mindmap_markdown.replace("- **", "### ")
-        mindmap_markdown = mindmap_markdown.replace("**", "")
-    else:
-        pass
-    return mindmap_markdown
-def sanitize_markdown(llm, mindmap_markdown):
-    prompt = f'''
-    As an experienced coder and programmer, help me convert the text \\"{mindmap_markdown}\\" into a well-formatted markdown. Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nDo not include anything in the response, that is not the part of mindmap."
-    '''
-    sanitized_markdown = generate_mindmap_structure(llm, prompt)
     return sanitized_markdown

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+from llama_cpp import Llama
+def load_llm_model():
+    try:
+        llm = Llama(
+            model_path="/home/user/app/Llama-3.2-1B-Instruct-Q8_0.gguf",
+            n_gpu_layers = -1,
+            n_ctx=100000,
+            n_batch=4096,
+        )
+        print("LLM model loaded successfully")
+        return llm
+    except Exception as e:
+        print(f"Error loading LLM model: {e}")
+        raise
+def get_text_from_pdf(file):
+    loader = PyPDFLoader(file)
+    pages = loader.load_and_split()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
+    texts = text_splitter.split_documents(pages)
+    final_text = ""
+    for text in texts:
+        if text.page_content.startswith("REFERENCES"):
+            break
+        else:
+            final_text = final_text + text.page_content
+    research_paper = ""
+    for text in final_text:
+        if text.startswith(("REFERENCES", "REFERENCESREFERENCES", "REFERENCESREFERENCESREFERENCES")):
+            break
+        else:
+            research_paper = research_paper + text
+    return research_paper[:10000]
+def generate_prompt(research_paper):
+    prompt = f'''
+    As a text script expert, please help me to write a short text script with the topic \\"{research_paper}\\".Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nSummarize the giving topic to generate a mind map (as many subtitles as possible, with a minimum of three subtitles) structure markdown.\\n Do not include anything in the response, that is not the part of mindmap.\\n  Importantly your output must use language \\"English\\""
+    '''
+    return prompt
+def generate_mindmap_structure(llm, prompt):
+    response = llm.create_chat_completion(
+        messages = [
+            {'role':'system',
+            'content': 'You are a helpful research assistant for generating well-formatted mindmaps in MarkDown format from scientific research papers.'},
+            {'role':'user',
+            'content': prompt}
+        ],
+        temperature=0.7,
+        top_k=200,
+        top_p=3.0,
+    )
+    mindmap_data = response['choices'][0]['message']['content']
+    return mindmap_data
+def generate_markdown(llm, file):
+    final_text = get_text_from_pdf(file)
+    prompt = generate_prompt(final_text)
+    mindmap_markdown = generate_mindmap_structure(llm, prompt)
+    if "**" in mindmap_markdown:
+        mindmap_markdown = mindmap_markdown.replace("- **", "### ")
+        mindmap_markdown = mindmap_markdown.replace("**", "")
+    else:
+        pass
+    return mindmap_markdown
+def sanitize_markdown(llm, mindmap_markdown):
+    prompt = f'''
+    As an experienced coder and programmer, help me convert the text \\"{mindmap_markdown}\\" into a well-formatted markdown. Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nDo not include anything in the response, that is not the part of mindmap."
+    '''
+    sanitized_markdown = generate_mindmap_structure(llm, prompt)
     return sanitized_markdown