changxin commited on
Commit
c4d4284
1 Parent(s): 067b2b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -163
app.py CHANGED
@@ -1,164 +1,150 @@
1
  import streamlit as st
2
- from streamlit.components.v1 import html
3
-
4
- st.header('嵌入网页及图表')
5
-
6
- html('''
7
- <iframe src="https://web.powerva.microsoft.com/environments/Default-51a58d6c-4fcf-4b75-8608-d00bf7f244d5/bots/new_bot_830e155fc862429e89683426b31c9bd5/webchat" height="500" frameborder="1" style="width:100%"></iframe>
8
- ''',height=520)
9
-
10
- html('''
11
- <iframe src="https://d.pbihub.cn/" height="600" frameborder="1" style="width:100%"></iframe>
12
- ''',height=620)
13
-
14
-
15
- html('''
16
- <head><meta charset="utf-8"><title>测试</title></head>
17
- <body>
18
- <div id="main" style="width: 600px;height:400px;"></div>
19
- <script src="http://echarts.baidu.com/build/dist/echarts.js"></script>
20
- <script>
21
- require.config({paths: {echarts: 'http://echarts.baidu.com/build/dist'}});
22
- require(
23
- ['echarts','echarts/chart/bar'],
24
- function (ec) {
25
- var myChart = ec.init(document.getElementById('main'));
26
- var option = {
27
-
28
- title : {
29
-
30
- text: '某地区蒸发量和降水量',
31
-
32
- subtext: '纯属虚构'
33
-
34
- },
35
-
36
- tooltip : {
37
-
38
- trigger: 'axis'
39
-
40
- },
41
-
42
- legend: {
43
-
44
- data:['蒸发量','降水量']
45
-
46
- },
47
-
48
- toolbox: {
49
-
50
- show : true,
51
-
52
- feature : {
53
-
54
- dataView : {show: true, readOnly: false},
55
-
56
- magicType : {show: true, type: ['line', 'bar']},
57
-
58
- restore : {show: true},
59
-
60
- saveAsImage : {show: true}
61
-
62
- }
63
-
64
- },
65
-
66
- calculable : true,
67
-
68
- xAxis : [
69
-
70
- {
71
-
72
- type : 'category',
73
-
74
- data : ['1月','2月','3月','4月','5月','6月','7月','8月','9月','10月','11月','12月']
75
-
76
- }
77
-
78
- ],
79
-
80
- yAxis : [
81
-
82
- {
83
-
84
- type : 'value'
85
-
86
- }
87
-
88
- ],
89
-
90
- series : [
91
-
92
- {
93
-
94
- name:'蒸发量',
95
-
96
- type:'bar',
97
-
98
- data:[2.0, 4.9, 7.0, 23.2, 25.6, 76.7, 135.6, 162.2, 32.6, 20.0, 6.4, 3.3],
99
-
100
- markPoint : {
101
-
102
- data : [
103
-
104
- {type : 'max', name: '最大值'},
105
-
106
- {type : 'min', name: '最小值'}
107
-
108
- ]
109
-
110
- },
111
-
112
- markLine : {
113
-
114
- data : [
115
-
116
- {type : 'average', name: '平均值'}
117
-
118
- ]
119
-
120
- }
121
-
122
- },
123
-
124
- {
125
-
126
- name:'降水量',
127
-
128
- type:'bar',
129
-
130
- data:[2.6, 5.9, 9.0, 26.4, 28.7, 70.7, 175.6, 182.2, 48.7, 18.8, 6.0, 2.3],
131
-
132
- markPoint : {
133
-
134
- data : [
135
-
136
- {name : '年最高', value : 182.2, xAxis: 7, yAxis: 183},
137
-
138
- {name : '年最低', value : 2.3, xAxis: 11, yAxis: 3}
139
-
140
- ]
141
-
142
- },
143
-
144
- markLine : {
145
-
146
- data : [
147
-
148
- {type : 'average', name : '平均值'}
149
-
150
- ]
151
-
152
- }
153
-
154
- }
155
-
156
- ]
157
- };
158
-
159
- myChart.setOption(option);
160
- }
161
- );
162
- </script>
163
- </body>
164
- ''',height=600)
 
1
  import streamlit as st
2
+ import math
3
+ import re
4
+ import os
5
+ from PyPDF2 import PdfFileReader, PdfFileWriter
6
+ import pandas as pd
7
+ import pdfplumber
8
+ from docx2pdf import convert
9
+ import fitz
10
+ import base64
11
+
12
+ st.header('PDF文件处理工具测试')
13
+
14
+ def fx(x):
15
+ return sum(x,[])
16
+ fns=st.radio('请选择PDF处理类型:',['拆分','合并','读取','在线预览','转换'])
17
+ if fns=='拆分':
18
+ uploaded_file = st.text_input("请输入要处理的pdf文件地址:")
19
+ if uploaded_file !='':
20
+
21
+ pdf_reader = PdfFileReader(uploaded_file)
22
+ n=pdf_reader.getNumPages()
23
+ che=st.radio('选择拆分类型',['按固定页数拆分','截取某几页','删除指定页面'])
24
+ if che=='按固定页数拆分':
25
+ fn=st.number_input('请输入每组拆分的文档页数:',1,n,1)
26
+ stre=st.text_input("请输入拆分后文件存放根目录:")
27
+ zs=math.ceil(n/fn)
28
+ if st.button('开始拆分>>'):
29
+ for page in range(1,zs+1):
30
+ for pn in range(fn*page-fn,fn*page):
31
+ if pn<n:
32
+ pdf_writer = PdfFileWriter()
33
+ pdf_writer.addPage(pdf_reader.getPage(pn))
34
+ with open(stre+'/test-{}.pdf'.format(page), 'wb') as out:
35
+ pdf_writer.write(out)
36
+ elif che=='截取某几页':
37
+ st_en=st.text_input("请输入截取的起止页码,格式为“1-5”或“1,3,5”:")
38
+ stre2=st.text_input("请输入截取后pdf文件存放根目录:")
39
+ if st_en!='':
40
+ tt=[int(x) for x in re.split(r'[-,\s]\s*',st_en)]
41
+ if st.button('开始截取>>'):
42
+ outw=PdfFileWriter()
43
+ for r in (tt if ',' in st_en else range(tt[0]-1,tt[1])):
44
+ outw.addPage(pdf_reader.getPage(r))
45
+ with open(stre2+'/666.pdf', 'wb') as out:
46
+ outw.write(out)
47
+ else:
48
+ st_en2=st.text_input("请输入需要删除的页码,格式为“1-5”或“1,3,5”:")
49
+ stre3=st.text_input("请输入删除指定页面后的pdf文件存放根目录:")
50
+ if st_en2!='':
51
+ tt=[int(x) for x in re.split(r'[-,\s]\s*',st_en2)]
52
+ if st.button('开始删除>>'):
53
+ outw2=PdfFileWriter()
54
+ for r in range(n):
55
+ if r not in (tt if ',' in st_en2 else range(tt[0]-1,tt[1])):
56
+ outw2.addPage(pdf_reader.getPage(r))
57
+ with open(stre3+'/666.pdf', 'wb') as out:
58
+ outw2.write(out)
59
+ elif fns=='合并':
60
+ path = st.text_input("请输入要处理的pdf文件根目录:")
61
+ scn = st.text_input("请填写输出文件地址及文件名")
62
+ if path !='' and scn!='':
63
+ file_list = os.listdir(path)
64
+ if st.button('开始合并>>'):
65
+ file_out = PdfFileWriter()
66
+ for file in file_list:
67
+ docdir = os.path.join(path, file)
68
+ file_read = PdfFileReader(docdir)
69
+ for pageNum in range(file_read.getNumPages()):
70
+ file_out.addPage(file_read.getPage(pageNum))
71
+ with open(scn,'wb') as output:
72
+ file_out.write(output)
73
+
74
+
75
+ elif fns=='读取':
76
+ path3 = st.text_input("请输入要读取的pdf文件地址:")
77
+ if path3 !='':
78
+ ms=st.radio('请选择读取模式:',['指定页码','全部'])
79
+ if ms=='指定页码':
80
+ ymq= st.number_input("请选择要读取的pdf页码:",1,66,1)
81
+ dqlx=st.radio('请选择读取类型',['文本内容','表格内容'])
82
+ with pdfplumber.open(path3) as p:
83
+ page = p.pages[ymq-1]
84
+ if dqlx=='文本内容':
85
+ textdata = page.extract_text()
86
+ st.write(textdata)
87
+ else:
88
+ n_table=st.number_input('请选择读取页面中第几个表格:',1,3,1)
89
+ tables=page.extract_tables()
90
+ datan=tables[n_table-1]
91
+ st.dataframe(pd.DataFrame(datan[1:],columns=datan[0]))
92
+ else:
93
+ dqlx2=st.radio('请选择读取类型',['文本内容','表格内容'])
94
+ with pdfplumber.open(path3) as p:
95
+ if dqlx2=='文本内容':
96
+ sz='\n'.join([page.extract_text() for page in p.pages])
97
+ st.write(sz)
98
+ else:
99
+ st.dataframe(pd.concat([pd.DataFrame(data=y[1:],columns=y[0]) for y in fx([page.extract_tables() for page in p.pages])]))
100
+
101
+ elif fns=='在线预览':
102
+ file = st.file_uploader("请上传PDF")
103
+ if file is not None:
104
+ base64_pdf = base64.b64encode(file.read()).decode('utf-8')
105
+ pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="1000" type="application/pdf">'
106
+ st.markdown(pdf_display, unsafe_allow_html=True)
107
+
108
+ else:
109
+ ms1=st.radio('请选择转换模式:',['word->pdf','ppt->pdf','pdf->jpg/png','jpg/png->pdf'])
110
+ if ms1=='word->pdf':
111
+ path4 = st.text_input("请输入要批量转换的word文件根目录:")
112
+ if path4 !='':
113
+ FileList = map(lambda x: path4 + '\\' + x, os.listdir(path4))
114
+ for file in FileList:
115
+ convert(file, f"{file.split('.')[0]}.pdf")
116
+ st.success('转换成功!')
117
+ elif ms1=='pdf->jpg/png':
118
+ path5 = st.text_input("请输入要转换的pdf文件地址:")
119
+ dir_1=st.text_input("请输入要输出的图片保存根目录:")
120
+ if path5 !='' and dir_1 !='':
121
+ doc = fitz.open(path5)
122
+ for page in doc:
123
+ pix = page.get_pixmap()
124
+ pix.save(dir_1+"/page-%i.png" % page.number)
125
+
126
+ elif ms1=='jpg/png->pdf':
127
+ dir_2=st.text_input("请输入要转换为pdf的图片根目录:")
128
+ path6 = st.text_input("请输入合成的pdf文件存放地址:")
129
+ if path6 !='' and dir_2 !='':
130
+ doc = fitz.open()
131
+ imglist = os.listdir(dir_2)
132
+ for i, f in enumerate(imglist):
133
+ img = fitz.open(os.path.join(dir_2, f))
134
+ rect = img[0].rect
135
+ pdfbytes = img.convert_to_pdf()
136
+ img.close()
137
+ imgPDF = fitz.open("pdf", pdfbytes)
138
+ page = doc.new_page(width = rect.width,height = rect.height)
139
+ page.show_pdf_page(rect, imgPDF, 0)
140
+ doc.save(path6)
141
+ elif ms1=='ppt->pdf':
142
+ dir_3=st.text_input("请输入要转换为pdf的PPT文件地址:")
143
+ path7 = st.text_input("请输入生成的pdf文件存放地址:")
144
+ if path7 !='' and dir_3 !='':
145
+ ppt = fitz.open(dir_3)
146
+ pdfbytes = ppt.convert_to_pdf()
147
+ pdf = fitz.open("pdf", pdfbytes)
148
+ pdf.save(path7)
149
+ else:
150
+ ""