Spaces:
Sleeping
Sleeping
File size: 1,295 Bytes
f51b958 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import streamlit as st
import glob, os, sys; sys.path.append('/src')
#import helper
import preprocessing as pre
import cleaning as clean
def app():
# Sidebar
st.sidebar.title('Analyse Policy Document')
# Container
with st.container():
st.markdown("<h1 style='text-align: center; color: black;'>SDSN X GIZ Policy Tracing</h1>",
unsafe_allow_html=True)
file = st.file_uploader('Upload PDF File', type=['pdf', 'docx', 'txt'])
if file is not None:
st.write("Filename: ", file.name)
# text = []
# with pdfplumber.open(file) as pdf:
# for page in pdf.pages:
# text.append(page.extract_text())
# text_str = ' '.join([page for page in text])
# st.write('Number of pages:',len(pdf.pages))
# load document
docs = pre.load_document(file)
# preprocess document
docs_processed, df, all_text = clean.preprocessing(docs)
st.write('... ')
else:
st.write(' ')
st.write(' ')
st.markdown("<h3 style='text-align: center; color: black;'>no PDF uploaded ...</h3>",
unsafe_allow_html=True)
|