Spaces:

raannakasturi
/

MindMap

Runtime error

App Files Files Community

raannakasturi commited on Nov 15, 2024

Commit

a1e6c4a

verified ·

1 Parent(s): e785663

Upload 3 files

Browse files

Files changed (3) hide show

app.py +90 -60
generate_markdown.py +16 -5
generate_mindmap.py +48 -43

app.py CHANGED Viewed

@@ -1,60 +1,90 @@
-import os
-import sys
-from generate_markdown import load_llm_model, generate_markdown
-from generate_mindmap import generate_mindmap_svg
-import gradio as gr
-import subprocess
-def generate(file):
-    print(f"Generating mindmap for {file}")
-    summary = "This is a summary of the research paper"
-    mindmap_markdown = generate_markdown(llm, file)
-    mindmap_svg = generate_mindmap_svg(mindmap_markdown)
-    print("Mindmap generated successfully")
-    return summary, mindmap_markdown, mindmap_svg
-theme = gr.themes.Soft(
-    primary_hue="purple",
-    secondary_hue="cyan",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins'), gr.themes.GoogleFont('poppins')],
-)
-with gr.Blocks(theme=theme, title="Binary Biology") as app:
-    file = gr.File(file_count='single', label='Upload Research Paper PDF file', file_types=['.pdf'])
-    summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
-    markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
-    graphical_mindmap = gr.Image(label='Graphical Mindmap', interactive=False, show_download_button=True, format='svg')
-    submit = gr.Button(value='Submit')
-    submit.click(generate,
-                inputs=[file],
-                outputs=[summary, markdown_mindmap, graphical_mindmap],
-                scroll_to_output=True,
-                show_progress=True,
-                queue=True,
-    )
-if __name__ == "__main__":
-    try:
-        env = os.environ.copy()
-        env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
-        cmd = ["pip", "install", "llama-cpp-python"]
-        subprocess.run(cmd, env=env)
-    except:
-        cmd = ["pip", "install", "llama-cpp-python"]
-        subprocess.run(cmd)
-    try:
-        try:
-            subprocess.run(['apt', 'install', '-y', 'graphviz'])
-            print("Graphviz installed successfully")
-        except:
-            subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
-            print("Graphviz installed successfully using sudo")
-    except:
-            print("Graphviz installation failed")
-            sys.exit(1)
-    print("Graphviz loaded successfully")
-    llm = load_llm_model()
-    print("Model loaded successfully")
-    app.queue(default_concurrency_limit=10).launch(show_error=True)

+import os
+import sys
+from generate_markdown import load_llm_model, generate_markdown, sanitize_markdown
+from generate_mindmap import generate_mindmap
+import gradio as gr
+import subprocess
+def generate(file):
+    print(f"Generating mindmap for {file.name}")
+    unformatted_markdown = True
+    summary = "This is a summary of the research paper"
+    mindmap_markdown = generate_markdown(llm, file)
+    print('mindmap_markdown:', mindmap_markdown)
+    while unformatted_markdown:
+        if mindmap_markdown.startswith("#") and '-' in mindmap_markdown:
+            unformatted_markdown = False
+            mindmap_svg, mindmap_pdf = generate_mindmap(mindmap_markdown)
+        else:
+            unformatted_markdown = True
+            mindmap_markdown = sanitize_markdown(llm, mindmap_markdown)
+    print("Mindmap generated successfully")
+    return summary, mindmap_markdown, mindmap_svg, mindmap_svg, mindmap_pdf, mindmap_pdf
+theme = gr.themes.Soft(
+    primary_hue="purple",
+    secondary_hue="cyan",
+    neutral_hue="slate",
+    font=[gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('Poppins')],
+)
+with gr.Blocks(theme=theme, title="Binary Biology") as app:
+    with gr.Row():
+        file = gr.File(file_count='single', label='Upload Research Paper PDF file', file_types=['.pdf'])
+        with gr.Column():
+            submit = gr.Button(value='Submit')
+            clear = gr.ClearButton(value='Clear')
+    with gr.Row():
+        summary = gr.TextArea(label='Summary', lines=5, interactive=False, show_copy_button=True)
+        markdown_mindmap = gr.Textbox(label='Mindmap', lines=5, interactive=False, show_copy_button=True)
+    with gr.Row():
+        with gr.Column():
+            svg_mindmap = gr.File(label='Graphical SVG Mindmap', interactive=False)
+            download_svg_mindmap = gr.File(label='Download SVG Mindmap', interactive=False)
+        with gr.Column():
+            pdf_mindmap = gr.File(label='Graphical PDF Mindmap', interactive=False)
+            download_pdf_mindmap = gr.File(label='Download PDF Mindmap', interactive=False)
+    submit.click(
+        generate,
+        inputs=[file],
+        outputs=[summary, markdown_mindmap, svg_mindmap, download_svg_mindmap, pdf_mindmap, download_pdf_mindmap],
+        scroll_to_output=True,
+        show_progress='minimal',
+        queue=True,
+    )
+    clear.click(
+        lambda: (None, None, None, None, None, None),
+        inputs=[file],
+        outputs=[summary, markdown_mindmap, svg_mindmap, download_svg_mindmap, pdf_mindmap, download_pdf_mindmap]
+    )
+if __name__ == "__main__":
+    try:
+        env = os.environ.copy()
+        env["CMAKE_ARGS"] = "-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
+        subprocess.run(["pip", "install", "llama-cpp-python"], env=env)
+    except Exception as e:
+        print(f"Failed to install llama-cpp-python: {e}")
+    try:
+        subprocess.run(['apt', 'install', '-y', 'graphviz'])
+        print("Graphviz installed successfully")
+    except Exception:
+        try:
+            subprocess.run(['sudo', 'apt', 'install', '-y', 'graphviz'])
+            print("Graphviz installed successfully using sudo")
+        except:
+            print("Graphviz installation failed")
+            sys.exit(1)
+    print("Graphviz loaded successfully")
+    llm = load_llm_model()
+    print("Model loaded successfully")
+    app.queue(default_concurrency_limit=1).launch(show_error=True)

generate_markdown.py CHANGED Viewed

@@ -6,10 +6,9 @@ def load_llm_model():
     try:
         llm = Llama(
             model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
-            # n_gpu_layers = 40,
-            n_ctx=130000,
-            n_batch=1024,
-            # main_gpu=0
         )
         print("LLM model loaded successfully")
         return llm
@@ -34,7 +33,7 @@ def get_text_from_pdf(file):
             break
         else:
             research_paper = research_paper + text
-    return research_paper[:100000]
 def generate_prompt(research_paper):
     prompt = f'''
@@ -61,4 +60,16 @@ def generate_markdown(llm, file):
     final_text = get_text_from_pdf(file)
     prompt = generate_prompt(final_text)
     mindmap_markdown = generate_mindmap_structure(llm, prompt)
     return mindmap_markdown

     try:
         llm = Llama(
             model_path="Llama-3.2-1B-Instruct-Q8_0.gguf",
+            n_gpu_layers = -1,
+            n_ctx=100000,
+            n_batch=4096,
         )
         print("LLM model loaded successfully")
         return llm
             break
         else:
             research_paper = research_paper + text
+    return research_paper[:10000]
 def generate_prompt(research_paper):
     prompt = f'''
     final_text = get_text_from_pdf(file)
     prompt = generate_prompt(final_text)
     mindmap_markdown = generate_mindmap_structure(llm, prompt)
+    if "**" in mindmap_markdown:
+        mindmap_markdown = mindmap_markdown.replace("- **", "### ")
+        mindmap_markdown = mindmap_markdown.replace("**", "")
+    else:
+        pass
     return mindmap_markdown
+def sanitize_markdown(llm, mindmap_markdown):
+    prompt = f'''
+    As an experienced coder and programmer, help me convert the text \\"{mindmap_markdown}\\" into a well-formatted markdown. Your output should only and strictly use the following template:\\n# {{Title}}\\n## {{Subtitle01}}\\n- {{Emoji01}} Bulletpoint01\\n- {{Emoji02}} Bulletpoint02\\n## {{Subtitle02}}\\n- {{Emoji03}} Bulletpoint03\\n- {{Emoji04}} Bulletpoint04\\n\\nDo not include anything in the response, that is not the part of mindmap."
+    '''
+    sanitized_markdown = generate_mindmap_structure(llm, prompt)
+    return sanitized_markdown

generate_mindmap.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from graphviz import Digraph
 import re
 import random
@@ -89,6 +90,11 @@ def add_nodes_to_graph(graph, node, parent_id=None, font_size=9, parent_color=No
         # Assign a random color to each child node (no inheritance from parent)
         add_nodes_to_graph(graph, child, node_id, font_size=max(8, font_size - 1), parent_color=parent_color)
 def generate_mindmap_svg(md_text):
     mindmap_dict = parse_markdown_to_dict(md_text)
     root_title = mindmap_dict.get('title', 'Mindmap')
@@ -102,63 +108,62 @@ def generate_mindmap_svg(md_text):
     # Replace %3 with the sanitized filename in the SVG content
     svg_content = svg_content.replace("%3", root_title)
     # Save the modified SVG content to a file
-    with open(f'{output_filename}.svg', 'w') as f:
         f.write(svg_content)
-    return f"{output_filename}"
 # md = '''
-# Here is a mind map summarizing the topic of combining machine learning (ML) and computational chemistry (CompChem) for predictive insights into chemical systems:
-# **I. Introduction**
-# * Machine learning (ML) poised to transform chemical sciences
-# * Combining ML and CompChem for predictive insights
-# **II. Computational Chemistry (CompChem)**
-# * Computational quantum chemistry (CQChem)
-# * Methods for generating data sets (e.g., wavefunction theory, correlated wavefunction methods, density functional theory)
-# * Representations of systems (e.g., simple, complex, ambiguous)
-# **III. Wavefunction Theory Methods**
-# * Nonrelativistic time-independent Schrödinger equation
-# * Electronic Schrödinger equation
-# * Hartree-Fock (HF) approach
-# * Correlated wavefunction methods (e.g., extended Hückel theory, neglect of diatomic diﬀerential overlap)
-# **IV. Density Functional Theory (DFT)**
-# * Kinetic energy (KE-) or orbital-free (OF-) DFT
-# * Exchange-correlation functional (EC)
-# * Kohn-Sham (KS-) DFT
-# * Semiempirical methods (e.g., extended Hückel theory, neglect of diatomic diﬀerential overlap)
-# **V. Semiempirical Methods**
-# * Extended Hückel theory
-# * Neglect of diatomic diﬀerential overlap
-# * Semiempirical bond-order potentials (BOPs)
-# * Semiempirical nuclear quantum eﬀects (NQEs)
-# **VI. Response Properties**
-# * Nuclear forces (e.g., F = -Π)
-# * Hessian calculations (e.g., second derivative of energy with respect to nuclear positions)
-# * Energy conserving forces (e.g., dipole moments)
-# **VII. Applications of ML in CompChem**
-# * Predicting molecular and material properties
-# * Predicting chemical reactions and processes
-# * Predicting materials properties (e.g., conductivity, optical properties)
-# * Predicting drug design and development
-# **VIII. Future Directions**
-# * Developing more accurate ML models for CompChem
-# * Improving the transferability of ML models between different systems
-# * Using ML to accelerate and improve the discovery of new materials and compounds
-# '''
 # generate_mindmap_svg(md)

 from graphviz import Digraph
+from cairosvg import svg2pdf
 import re
 import random
         # Assign a random color to each child node (no inheritance from parent)
         add_nodes_to_graph(graph, child, node_id, font_size=max(8, font_size - 1), parent_color=parent_color)
+def generate_mindmap_pdf(svg_file):
+    pdf_file = svg_file.replace(".svg", ".pdf")
+    svg2pdf(file_obj=open(svg_file, "rb"), write_to=pdf_file)
+    return pdf_file
 def generate_mindmap_svg(md_text):
     mindmap_dict = parse_markdown_to_dict(md_text)
     root_title = mindmap_dict.get('title', 'Mindmap')
     # Replace %3 with the sanitized filename in the SVG content
     svg_content = svg_content.replace("%3", root_title)
     # Save the modified SVG content to a file
+    with open(output_filename, 'w') as f:
         f.write(svg_content)
+    return output_filename
+def generate_mindmap(md_text):
+    mindmap_svg = generate_mindmap_svg(md_text)
+    mindmap_pdf = generate_mindmap_pdf(mindmap_svg)
+    return mindmap_svg, mindmap_pdf
 # md = '''
+# # Nucleic Acids Research: Updates to SAbDab and SAbDab-nano
+# ## Introduction
+# - **Antibodies in the Age of Biotherapeutics**
+#   - Antibodies are fundamental components of the immune system
+#   - Represent the largest class of biotherapeutics
+#   - Ability to bind to antigen targets with high affinity and specificity makes them promising candidates for development of therapeutic antibodies against various targets, including cancer, virus, and other diseases.
+# ## Updates to Data Annotation
+# - **Search Interface**
+#   - SAbDab can now be searched via a Flask app served by a fast SQL backend
+#   - Structures can be searched based on experimental method, resolution, species, type of antigen, presence of affinity values, and presence of amino acid residues at specific sequence positions defined using the Chothia numbering scheme.
+# - **Auxiliary Databases**
+#   - Thera-SAbDab and CoV-AbDab databases contain antibody sequence information linking to relevant entries in SAbDab
+#   - These databases contain antibody sequence information, linking to relevant entries in SAbDab where structures of the antibody in question (CoV-AbDab) or structures with at least 95% sequence identity (Thera-SAbDab) exist.
+# ## Updates to Data Access
+# - **Search Features**
+#   - New search features were added to improve the ability to create task-specific nanobody and antibody datasets
+#   - Free-text keyword query can be performed over certain annotation fields (antigen, species, publication, and structure title)
+# - **Download Options**
+#   - Structures matching search queries can be downloaded in bulk as a zipped archive and a summary .csv file containing annotation data
+#   - Individual structures can be accessed via the structure viewer interface
+# ## Updates to SAbDab-nano
+# - **Development of SAbDab-nano**
+#   - SAbDab-nano is a subset of SAbDab containing nanobody structures
+#   - Entries for which at least one antibody has a heavy chain variable domain, but no light chain variable domain are added to SAbDab-nano
+# - **Composition and Growth**
+#   - SAbDab-nano contains 823 structures (492 nanobodies) with non-redundant CDR sequences
+#   - Average 3.8 structures per week are added to SAbDab-nano over the first 36 weeks of 2021
+# ## Comparison to Other Nanobody Resources
+# - **Other Databases**
+#   - There are other databases compiling nanobody data, but no other resource provides nanobody structures in a continuously updated and comprehensively annotated format
+#   - Several databases compiling nanobody sequences (but not structures) from a variety of data sources
+# ## Conclusion
+# - **SAbDab and SAbDab-nano**
+#   - SAbDab continues to be updated weekly and represents the most thoroughly annotated antibody structure database from which researchers can quickly create custom datasets
+#   - Searching SAbDab is now more powerful and faster, with new connections to auxiliary databases that catalogue therapeutic and antigen-specific antibodies
+#   - SAbDab and SAbDab-nano can be accessed freely online under a CC-BY 4.0 license at opig.stats.ox.ac.uk/webapps/newsabdab/ or at opig.stats.ox.ac.uk/webapps/newsabdab/nano respectfully'''
 # generate_mindmap_svg(md)