OzoneAsai commited on
Commit
2532716
1 Parent(s): a1040ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +297 -102
app.py CHANGED
@@ -1,119 +1,314 @@
1
  import streamlit as st
2
- import pandas as pd
3
  from transformers import pipeline
4
- from datetime import datetime
 
5
 
6
- # ================================
7
- # Streamlit Page Configuration
8
- # ================================
9
- st.set_page_config(
10
- page_title="🌐 Multi-Language Translator",
11
- layout="centered",
12
- initial_sidebar_state="auto",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  )
14
 
15
- # ================================
16
- # Cache the Translation Pipelines
17
- # ================================
18
  @st.cache_resource
19
- def load_translation_pipelines():
20
  """
21
- Load and cache translation pipelines to avoid reloading on every interaction.
22
  """
23
- enja = pipeline("translation", model="staka/fugumt-en-ja")
24
- jaen = pipeline("translation", model="staka/fugumt-ja-en")
25
- zhja = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-zh-ja")
26
- return {'enja': enja, 'jaen': jaen, 'zhja': zhja}
27
-
28
- # Load the translation models
29
- try:
30
- session_models = load_translation_pipelines()
31
- except Exception as e:
32
- st.error(f"Error loading translation models: {e}")
33
- session_models = {}
34
-
35
- # ================================
36
- # Streamlit Application Layout
37
- # ================================
38
- st.title("🌐 Multi-Language Translator")
39
-
40
- # Initialize session state for CSV creation flag
41
- if 'csv_created' not in st.session_state:
42
- st.session_state.csv_created = False
43
 
44
- # ================================
45
- # User Input Section
46
- # ================================
47
- st.header("🔤 Enter Text to Translate")
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Model selection
50
- model_options = {
51
- 'English to Japanese': 'enja',
52
- 'Japanese to English': 'jaen',
53
- 'Chinese to Japanese': 'zhja'
54
- }
55
- model_display = list(model_options.keys())
56
- model_keys = list(model_options.values())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- selected_model_display = st.selectbox("Select Translation Model", model_display, index=0)
59
- selected_model = model_options[selected_model_display]
60
 
61
- # Text input
62
- text = st.text_area("Input Text", height=150)
63
 
64
- # ================================
65
- # Translation and Output
66
- # ================================
67
- if st.button("🚀 Translate"):
68
- if not text.strip():
69
- st.warning("Please enter text to translate.")
70
- elif selected_model not in session_models:
71
- st.error("Selected translation model is not available.")
72
- else:
73
- with st.spinner("Translating..."):
74
- try:
75
- translator = session_models[selected_model]
76
- translation = translator(text)[0]['translation_text']
77
- st.success("Translation Successful!")
78
- st.subheader("📝 Translation Result")
79
- st.write(translation)
80
 
81
- # Prepare data for CSV
82
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
83
- data = {
84
- 'Timestamp': [timestamp],
85
- 'Model': [selected_model_display],
86
- 'Original Text': [text],
87
- 'Translated Text': [translation]
88
- }
89
- df = pd.DataFrame(data)
90
 
91
- # Save to CSV
92
- csv_file = 'translation_data.csv'
93
- if not st.session_state.csv_created:
94
- df.to_csv(csv_file, mode='w', header=True, index=False)
95
- st.session_state.csv_created = True
96
- else:
97
- df.to_csv(csv_file, mode='a', header=False, index=False)
98
 
99
- st.info(f"Translation saved to `{csv_file}`.")
100
- except Exception as e:
101
- st.error(f"An error occurred during translation: {e}")
 
 
 
 
 
 
102
 
103
- # ================================
104
- # Optional: Download Translation Data
105
- # ================================
106
- if st.button("📥 Download Translation Data"):
107
- try:
108
- df = pd.read_csv('translation_data.csv')
109
- csv = df.to_csv(index=False).encode('utf-8')
110
- st.download_button(
111
- label="Download CSV",
112
- data=csv,
113
- file_name='translation_data.csv',
114
- mime='text/csv',
115
- )
116
- except FileNotFoundError:
117
- st.warning("No translation data available to download.")
118
- except Exception as e:
119
- st.error(f"An error occurred while preparing the download: {e}")
 
1
  import streamlit as st
2
+ import feedparser
3
  from transformers import pipeline
4
+ import requests
5
+ import datetime
6
 
7
+ # Streamlitの設定
8
+ st.set_page_config(page_title="今日のテクノロジーニュース", layout="wide")
9
+ st.title("📡 今日のテクノロジーニュース")
10
+
11
+ # RSSフィードのURL
12
+ rss_url = "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"
13
+
14
+ # 利用可能な翻訳モデルのリスト
15
+ models = [
16
+ {
17
+ "name": "facebook/nllb-200-distilled-600M",
18
+ "description": "Translation • Updated Feb 15 • 322k • 504",
19
+ "src_lang": "eng_Latn",
20
+ "tgt_lang": "jpn_Jpan"
21
+ },
22
+ {
23
+ "name": "facebook/mbart-large-50-many-to-many-mmt",
24
+ "description": "Translation • Updated Sep 29, 2023 • 646k • 278",
25
+ "src_lang": "eng_Latn",
26
+ "tgt_lang": "jpn_Jpan"
27
+ },
28
+ {
29
+ "name": "facebook/nllb-200-3.3B",
30
+ "description": "Translation • Updated Feb 12, 2023 • 28.9k • 249",
31
+ "src_lang": "eng_Latn",
32
+ "tgt_lang": "jpn_Jpan"
33
+ },
34
+ {
35
+ "name": "google/madlad400-10b-mt",
36
+ "description": "Translation • Updated Apr 12 • 1.76k • 84",
37
+ "src_lang": "eng_Latn",
38
+ "tgt_lang": "jpn_Jpan"
39
+ },
40
+ {
41
+ "name": "ken11/mbart-ja-en",
42
+ "description": "Translation • Updated Oct 13, 2021 • 63 • 3",
43
+ "src_lang": "jpn_Jpan",
44
+ "tgt_lang": "eng_Latn"
45
+ },
46
+ {
47
+ "name": "facebook/nllb-200-1.3B",
48
+ "description": "Translation • Updated Feb 12, 2023 • 14.6k • 44",
49
+ "src_lang": "eng_Latn",
50
+ "tgt_lang": "jpn_Jpan"
51
+ },
52
+ {
53
+ "name": "facebook/nllb-200-distilled-1.3B",
54
+ "description": "Translation • Updated Feb 12, 2023 • 101k • 98",
55
+ "src_lang": "eng_Latn",
56
+ "tgt_lang": "jpn_Jpan"
57
+ },
58
+ {
59
+ "name": "alirezamsh/small100",
60
+ "description": "Translation • Updated Jul 23 • 1.85k • 60",
61
+ "src_lang": "eng_Latn",
62
+ "tgt_lang": "jpn_Jpan"
63
+ },
64
+ {
65
+ "name": "Unbabel/wmt22-cometkiwi-da",
66
+ "description": "Translation • Updated Oct 13, 2023 • 1 • 24",
67
+ "src_lang": "eng_Latn",
68
+ "tgt_lang": "jpn_Jpan"
69
+ },
70
+ {
71
+ "name": "ychenNLP/nllb-200-3.3B-easyproject",
72
+ "description": "Translation • Updated Aug 30, 2023 • 73 • 2",
73
+ "src_lang": "eng_Latn",
74
+ "tgt_lang": "jpn_Jpan"
75
+ },
76
+ {
77
+ "name": "JustFrederik/sugoi-v3.3-ja-en-ct2-float16",
78
+ "description": "Translation • Updated May 10, 2023 • 2",
79
+ "src_lang": "jpn_Jpan",
80
+ "tgt_lang": "eng_Latn"
81
+ },
82
+ {
83
+ "name": "JustFrederik/sugoi-v3.3-ja-en-ct2-int8",
84
+ "description": "Translation • Updated May 10, 2023 • 22 • 1",
85
+ "src_lang": "jpn_Jpan",
86
+ "tgt_lang": "eng_Latn"
87
+ },
88
+ {
89
+ "name": "JustFrederik/sugoi-v4-ja-en-ct2-float16",
90
+ "description": "Translation • Updated May 10, 2023 • 13 • 1",
91
+ "src_lang": "jpn_Jpan",
92
+ "tgt_lang": "eng_Latn"
93
+ },
94
+ {
95
+ "name": "JustFrederik/sugoi-v4-ja-en-ct2-int8",
96
+ "description": "Translation • Updated May 10, 2023",
97
+ "src_lang": "jpn_Jpan",
98
+ "tgt_lang": "eng_Latn"
99
+ },
100
+ {
101
+ "name": "JustFrederik/sugoi-v4-ja-en-ct2",
102
+ "description": "Translation • Updated May 10, 2023 • 20 • 1",
103
+ "src_lang": "jpn_Jpan",
104
+ "tgt_lang": "eng_Latn"
105
+ },
106
+ {
107
+ "name": "JustFrederik/sugoi-v3.3-ja-en-ct2",
108
+ "description": "Translation • Updated May 10, 2023",
109
+ "src_lang": "jpn_Jpan",
110
+ "tgt_lang": "eng_Latn"
111
+ },
112
+ {
113
+ "name": "JustFrederik/nllb-200-distilled-600M-ct2-int8",
114
+ "description": "Translation • Updated May 15, 2023 • 225",
115
+ "src_lang": "eng_Latn",
116
+ "tgt_lang": "jpn_Jpan"
117
+ },
118
+ {
119
+ "name": "JustFrederik/nllb-200-distilled-1.3B-ct2-int8",
120
+ "description": "Translation • Updated May 15, 2023 • 74 • 1",
121
+ "src_lang": "eng_Latn",
122
+ "tgt_lang": "jpn_Jpan"
123
+ },
124
+ {
125
+ "name": "JustFrederik/nllb-200-1.3B-ct2-int8",
126
+ "description": "Translation • Updated May 15, 2023 • 12",
127
+ "src_lang": "eng_Latn",
128
+ "tgt_lang": "jpn_Jpan"
129
+ },
130
+ {
131
+ "name": "JustFrederik/nllb-200-1.3B-ct2-float16",
132
+ "description": "Translation • Updated May 15, 2023 • 6",
133
+ "src_lang": "eng_Latn",
134
+ "tgt_lang": "jpn_Jpan"
135
+ },
136
+ {
137
+ "name": "JustFrederik/nllb-200-1.3B-ct2",
138
+ "description": "Translation • Updated May 15, 2023 • 14",
139
+ "src_lang": "eng_Latn",
140
+ "tgt_lang": "jpn_Jpan"
141
+ },
142
+ {
143
+ "name": "JustFrederik/nllb-200-distilled-1.3B-ct2",
144
+ "description": "Translation • Updated May 15, 2023 • 3",
145
+ "src_lang": "eng_Latn",
146
+ "tgt_lang": "jpn_Jpan"
147
+ },
148
+ {
149
+ "name": "JustFrederik/nllb-200-distilled-1.3B-ct2-float16",
150
+ "description": "Translation • Updated May 15, 2023 • 7 • 1",
151
+ "src_lang": "eng_Latn",
152
+ "tgt_lang": "jpn_Jpan"
153
+ },
154
+ {
155
+ "name": "JustFrederik/nllb-200-distilled-600M-ct2",
156
+ "description": "Translation • Updated May 15, 2023 • 4",
157
+ "src_lang": "eng_Latn",
158
+ "tgt_lang": "jpn_Jpan"
159
+ },
160
+ {
161
+ "name": "JustFrederik/nllb-200-distilled-600M-ct2-float16",
162
+ "description": "Translation • Updated May 15, 2023 • 8",
163
+ "src_lang": "eng_Latn",
164
+ "tgt_lang": "jpn_Jpan"
165
+ },
166
+ {
167
+ "name": "JustFrederik/nllb-200-3.3B-ct2-float16",
168
+ "description": "Translation • Updated May 15, 2023 • 26 • 3",
169
+ "src_lang": "eng_Latn",
170
+ "tgt_lang": "jpn_Jpan"
171
+ },
172
+ {
173
+ "name": "Babelscape/mrebel-large",
174
+ "description": "Translation • Updated Jun 21, 2023 • 67.5k • 66",
175
+ "src_lang": "eng_Latn",
176
+ "tgt_lang": "jpn_Jpan"
177
+ },
178
+ {
179
+ "name": "Babelscape/mrebel-large-32",
180
+ "description": "Translation • Updated Jun 23, 2023 • 97 • 6",
181
+ "src_lang": "eng_Latn",
182
+ "tgt_lang": "jpn_Jpan"
183
+ },
184
+ {
185
+ "name": "Babelscape/mrebel-base",
186
+ "description": "Translation • Updated Jun 23, 2023 • 66 • 5",
187
+ "src_lang": "eng_Latn",
188
+ "tgt_lang": "jpn_Jpan"
189
+ },
190
+ {
191
+ "name": "winstxnhdw/nllb-200-distilled-1.3B-ct2-int8",
192
+ "description": "Translation • Updated Aug 3, 2023 • 2.42k • 4",
193
+ "src_lang": "eng_Latn",
194
+ "tgt_lang": "jpn_Jpan"
195
+ },
196
+ {
197
+ "name": "michaelfeil/ct2fast-nllb-200-distilled-1.3B",
198
+ "description": "Translation • Updated Dec 10, 2023 • 10 • 1",
199
+ "src_lang": "eng_Latn",
200
+ "tgt_lang": "jpn_Jpan"
201
+ },
202
+ {
203
+ "name": "michaelfeil/ct2fast-nllb-200-3.3B",
204
+ "description": "Translation • Updated Jul 21, 2023 • 36 • 11",
205
+ "src_lang": "eng_Latn",
206
+ "tgt_lang": "jpn_Jpan"
207
+ },
208
+ {
209
+ "name": "qiyuw/WSPAlign-xlm-base",
210
+ "description": "Translation • Updated Mar 18 • 4",
211
+ "src_lang": "xlm_Latn",
212
+ "tgt_lang": "jpn_Jpan"
213
+ },
214
+ # 既存のモデルを以下に追加できます
215
+ ]
216
+
217
+ # プルダウンメニューでモデルを選択
218
+ st.sidebar.header("翻訳モデルの選択")
219
+ selected_model = st.sidebar.selectbox(
220
+ "使用する翻訳モデルを選択してください:",
221
+ options=models,
222
+ format_func=lambda x: f"{x['name']} - {x['description']}"
223
  )
224
 
 
 
 
225
  @st.cache_resource
226
+ def load_translation_model(model_name, src_lang, tgt_lang):
227
  """
228
+ 選択された翻訳モデルをロードし、キャッシュします。
229
  """
230
+ try:
231
+ translator = pipeline(
232
+ "translation",
233
+ model=model_name,
234
+ src_lang=src_lang,
235
+ tgt_lang=tgt_lang
236
+ )
237
+ return translator
238
+ except Exception as e:
239
+ st.error(f"翻訳モデルのロード中にエラーが発生しました: {e}")
240
+ return None
 
 
 
 
 
 
 
 
 
241
 
242
+ @st.cache_data
243
+ def translate_text(_translator, text):
244
+ """
245
+ テキストを日本語に翻訳します。
246
+ 翻訳結果をキャッシュします。
247
+ """
248
+ if not _translator:
249
+ return "翻訳エラー"
250
+ try:
251
+ translation = _translator(text, max_length=500)[0]['translation_text']
252
+ return translation
253
+ except Exception as e:
254
+ st.error(f"翻訳中にエラーが発生しました: {e}")
255
+ return "翻訳エラー"
256
 
257
+ @st.cache_data(ttl=3600)
258
+ def fetch_rss_feed(url):
259
+ """
260
+ RSSフィードを取得し、XMLを保存してパースします。
261
+ キャッシュの有効期限は1時間です。
262
+ """
263
+ try:
264
+ response = requests.get(url)
265
+ if response.status_code != 200:
266
+ st.error(f"RSSフィードの取得に失敗しました。ステータスコード: {response.status_code}")
267
+ return None
268
+ # フィードのXMLを保存(データセットとして蓄積)
269
+ now = datetime.datetime.now()
270
+ filename = now.strftime("feed_%Y%m%d_%H%M%S.xml")
271
+ with open(filename, 'wb') as f:
272
+ f.write(response.content)
273
+ # フィードをパース
274
+ feed = feedparser.parse(response.content)
275
+ return feed
276
+ except Exception as e:
277
+ st.error(f"RSSフィードの取得中にエラーが発生しました: {e}")
278
+ return None
279
 
280
+ # フィードを取得
281
+ feed = fetch_rss_feed(rss_url)
282
 
283
+ if feed is None:
284
+ st.stop() # フィードの取得に失敗した場合、アプリを停止します
285
 
286
+ # 翻訳モデルをロード
287
+ translator = load_translation_model(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
+ # フィード内の記事をパースしてタイトルと説明を翻訳
290
+ for entry in feed.entries:
291
+ # タイトルと説明を取得
292
+ title = entry.title
293
+ description = entry.description
 
 
 
 
294
 
295
+ # タイトルと説明を日本語に翻訳(翻訳結果をキャッシュ)
296
+ translated_title = translate_text(translator, title)
297
+ translated_description = translate_text(translator, description)
 
 
 
 
298
 
299
+ # Markdown形式で表示
300
+ st.markdown(f"### **タイトル(英語):** {title}")
301
+ st.markdown(f"**タイトル(日本語):** {translated_title}")
302
+ st.markdown(f"**概要(英語):**")
303
+ st.write(description)
304
+ st.markdown(f"**概要(日本語):**")
305
+ st.write(translated_description)
306
+ st.markdown(f"[🌐 続きを読む]({entry.link})")
307
+ st.markdown("---")
308
 
309
+ # キャッシュをクリアするボタン
310
+ if st.button("キャッシュをクリア"):
311
+ load_translation_model.clear(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang'])
312
+ translate_text.clear()
313
+ fetch_rss_feed.clear()
314
+ st.success("キャッシュをクリアしました。")