Spaces:

pszemraj
/

summarize-long-text

Running on CPU Upgrade

App Files Files Community

Peter commited on May 23, 2022

Commit

c0a9b19

•

1 Parent(s): b2df366

:truck: move functions

Browse files

Files changed (2) hide show

app.py +2 -39
utils.py +49 -0

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import nltk
 from cleantext import clean
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
 _here = Path(__file__).parent
@@ -18,27 +19,6 @@ transformers.logging.set_verbosity_error()
 logging.basicConfig()
-def truncate_word_count(text, max_words=512):
-    """
-    truncate_word_count - a helper function for the gradio module
-    Parameters
-    ----------
-    text : str, required, the text to be processed
-    max_words : int, optional, the maximum number of words, default=512
-    Returns
-    -------
-    dict, the text and whether it was truncated
-    """
-    # split on whitespace with regex
-    words = re.split(r"\s+", text)
-    processed = {}
-    if len(words) > max_words:
-        processed["was_truncated"] = True
-        processed["truncated_text"] = " ".join(words[:max_words])
-    else:
-        processed["was_truncated"] = False
-        processed["truncated_text"] = text
-    return processed
 def proc_submission(
@@ -117,23 +97,6 @@ def proc_submission(
     return html
-def load_examples(examples_dir="examples"):
-    """
-    load_examples - a helper function for the gradio module to load examples
-    Returns:
-        list of str, the examples
-    """
-    src = _here / examples_dir
-    src.mkdir(exist_ok=True)
-    examples = [f for f in src.glob("*.txt")]
-    # load the examples into a list
-    text_examples = []
-    for example in examples:
-        with open(example, "r") as f:
-            text = f.read()
-            text_examples.append([text, "large", 2, 512, 0.7, 3.5, 3])
-    return text_examples
 if __name__ == "__main__":
@@ -183,6 +146,6 @@ if __name__ == "__main__":
         title=title,
         description=description,
         article="The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a notebook for a tutorial.",
-        examples=load_examples(),
         cache_examples=True,
     ).launch()

 from cleantext import clean
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
+from utils import load_examples, truncate_word_count
 _here = Path(__file__).parent
 logging.basicConfig()
 def proc_submission(
     return html
 if __name__ == "__main__":
         title=title,
         description=description,
         article="The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a notebook for a tutorial.",
+        examples=load_examples(_here / "examples"),
         cache_examples=True,
     ).launch()

utils.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+    utils.py - Utility functions for the project.
+"""
+from pathlib import Path
+import re
+def truncate_word_count(text, max_words=512):
+    """
+    truncate_word_count - a helper function for the gradio module
+    Parameters
+    ----------
+    text : str, required, the text to be processed
+    max_words : int, optional, the maximum number of words, default=512
+    Returns
+    -------
+    dict, the text and whether it was truncated
+    """
+    # split on whitespace with regex
+    words = re.split(r"\s+", text)
+    processed = {}
+    if len(words) > max_words:
+        processed["was_truncated"] = True
+        processed["truncated_text"] = " ".join(words[:max_words])
+    else:
+        processed["was_truncated"] = False
+        processed["truncated_text"] = text
+    return processed
+def load_examples(src):
+    """
+    load_examples - a helper function for the gradio module to load examples
+    Returns:
+        list of str, the examples
+    """
+    src = Path(src)
+    src.mkdir(exist_ok=True)
+    examples = [f for f in src.glob("*.txt")]
+    # load the examples into a list
+    text_examples = []
+    for example in examples:
+        with open(example, "r") as f:
+            text = f.read()
+            text_examples.append([text, "large", 2, 512, 0.7, 3.5, 3])
+    return text_examples