Spaces:

Araby
/

BRATArA

Runtime error

App Files Files

Araby commited on Oct 8, 2023

Commit

4a14093

•

1 Parent(s): f04d012

Upload 2 files

Browse files

Files changed (2) hide show

main.py +42 -0
requirements.txt +64 -0

main.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import streamlit as st
+from transformers import GPT2TokenizerFast, AutoModelForCausalLM
+from arabert.preprocess import ArabertPreprocessor
+# Load model and tokenizer
+model_name = "malmarjeh/gpt2"
+tokenizer = GPT2TokenizerFast.from_pretrained("aubmindlab/aragpt2-base")
+model = AutoModelForCausalLM.from_pretrained(model_name)
+preprocessor = ArabertPreprocessor(model_name=model_name)
+# Streamlit UI
+st.title('Arabic Text Summarizer')
+text = st.text_area("Paste your Arabic text here:")
+if st.button('Summarize'):
+    if text:
+        # Preprocess and tokenize input text
+        processed_text = preprocessor.preprocess(text)
+        formatted_text = '\n النص: ' + processed_text + ' \n الملخص: \n '
+        tokenizer.add_special_tokens({'pad_token': '<pad>'})
+        tokens = tokenizer.batch_encode_plus([formatted_text], return_tensors='pt', padding='max_length',
+                                             max_length=150)
+        # Generate summary
+        output = model.generate(
+            input_ids=tokens['input_ids'],
+            repetition_penalty=2.0,
+            num_beams=5,
+            max_length=600,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            bos_token_id=tokenizer.bos_token_id,
+        )
+        # Decode and display the summarized text
+        result = tokenizer.decode(output[0][150:], skip_special_tokens=True).strip()
+        st.subheader("Original Text")
+        st.write(text)
+        st.subheader("Summarized Text")
+        st.write(result)
+    else:
+        st.warning("Please enter Arabic text to summarize.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,64 @@

+altair==5.1.2
+arabert==1.0.1
+attrs==23.1.0
+blinker==1.6.3
+cachetools==5.3.1
+certifi==2023.7.22
+charset-normalizer==3.3.0
+click==8.1.7
+emoji==1.4.2
+farasapy==0.0.14
+filelock==3.12.4
+fsspec==2023.9.2
+gitdb==4.0.10
+GitPython==3.1.37
+huggingface-hub==0.17.3
+idna==3.4
+importlib-metadata==6.8.0
+Jinja2==3.1.2
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.1
+numpy==1.26.0
+packaging==23.2
+pandas==2.1.1
+Pillow==10.0.1
+protobuf==4.24.4
+PyArabic==0.6.15
+pyarrow==13.0.0
+pydeck==0.8.1b0
+Pygments==2.16.1
+python-dateutil==2.8.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+referencing==0.30.2
+regex==2023.10.3
+requests==2.31.0
+rich==13.6.0
+rpds-py==0.10.4
+safetensors==0.4.0
+sentencepiece==0.1.99
+six==1.16.0
+smmap==5.0.1
+streamlit==1.27.2
+sympy==1.12
+tenacity==8.2.3
+tokenizers==0.14.1
+toml==0.10.2
+toolz==0.12.0
+torch==2.1.0
+torchaudio==2.1.0
+torchvision==0.16.0
+tornado==6.3.3
+tqdm==4.66.1
+transformers==4.34.0
+typing_extensions==4.8.0
+tzdata==2023.3
+tzlocal==5.1
+urllib3==2.0.6
+validators==0.22.0
+zipp==3.17.0