Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,119 +1,314 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
from transformers import pipeline
|
4 |
-
|
|
|
5 |
|
6 |
-
#
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
)
|
14 |
|
15 |
-
# ================================
|
16 |
-
# Cache the Translation Pipelines
|
17 |
-
# ================================
|
18 |
@st.cache_resource
|
19 |
-
def
|
20 |
"""
|
21 |
-
|
22 |
"""
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
except Exception as e:
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# ================================
|
36 |
-
# Streamlit Application Layout
|
37 |
-
# ================================
|
38 |
-
st.title("🌐 Multi-Language Translator")
|
39 |
-
|
40 |
-
# Initialize session state for CSV creation flag
|
41 |
-
if 'csv_created' not in st.session_state:
|
42 |
-
st.session_state.csv_created = False
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
|
61 |
-
|
62 |
-
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
# ================================
|
67 |
-
if st.button("🚀 Translate"):
|
68 |
-
if not text.strip():
|
69 |
-
st.warning("Please enter text to translate.")
|
70 |
-
elif selected_model not in session_models:
|
71 |
-
st.error("Selected translation model is not available.")
|
72 |
-
else:
|
73 |
-
with st.spinner("Translating..."):
|
74 |
-
try:
|
75 |
-
translator = session_models[selected_model]
|
76 |
-
translation = translator(text)[0]['translation_text']
|
77 |
-
st.success("Translation Successful!")
|
78 |
-
st.subheader("📝 Translation Result")
|
79 |
-
st.write(translation)
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
'Original Text': [text],
|
87 |
-
'Translated Text': [translation]
|
88 |
-
}
|
89 |
-
df = pd.DataFrame(data)
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
df.to_csv(csv_file, mode='w', header=True, index=False)
|
95 |
-
st.session_state.csv_created = True
|
96 |
-
else:
|
97 |
-
df.to_csv(csv_file, mode='a', header=False, index=False)
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
#
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
csv = df.to_csv(index=False).encode('utf-8')
|
110 |
-
st.download_button(
|
111 |
-
label="Download CSV",
|
112 |
-
data=csv,
|
113 |
-
file_name='translation_data.csv',
|
114 |
-
mime='text/csv',
|
115 |
-
)
|
116 |
-
except FileNotFoundError:
|
117 |
-
st.warning("No translation data available to download.")
|
118 |
-
except Exception as e:
|
119 |
-
st.error(f"An error occurred while preparing the download: {e}")
|
|
|
1 |
import streamlit as st
|
2 |
+
import feedparser
|
3 |
from transformers import pipeline
|
4 |
+
import requests
|
5 |
+
import datetime
|
6 |
|
7 |
+
# Streamlitの設定
|
8 |
+
st.set_page_config(page_title="今日のテクノロジーニュース", layout="wide")
|
9 |
+
st.title("📡 今日のテクノロジーニュース")
|
10 |
+
|
11 |
+
# RSSフィードのURL
|
12 |
+
rss_url = "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"
|
13 |
+
|
14 |
+
# 利用可能な翻訳モデルのリスト
|
15 |
+
models = [
|
16 |
+
{
|
17 |
+
"name": "facebook/nllb-200-distilled-600M",
|
18 |
+
"description": "Translation • Updated Feb 15 • 322k • 504",
|
19 |
+
"src_lang": "eng_Latn",
|
20 |
+
"tgt_lang": "jpn_Jpan"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"name": "facebook/mbart-large-50-many-to-many-mmt",
|
24 |
+
"description": "Translation • Updated Sep 29, 2023 • 646k • 278",
|
25 |
+
"src_lang": "eng_Latn",
|
26 |
+
"tgt_lang": "jpn_Jpan"
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"name": "facebook/nllb-200-3.3B",
|
30 |
+
"description": "Translation • Updated Feb 12, 2023 • 28.9k • 249",
|
31 |
+
"src_lang": "eng_Latn",
|
32 |
+
"tgt_lang": "jpn_Jpan"
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"name": "google/madlad400-10b-mt",
|
36 |
+
"description": "Translation • Updated Apr 12 • 1.76k • 84",
|
37 |
+
"src_lang": "eng_Latn",
|
38 |
+
"tgt_lang": "jpn_Jpan"
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"name": "ken11/mbart-ja-en",
|
42 |
+
"description": "Translation • Updated Oct 13, 2021 • 63 • 3",
|
43 |
+
"src_lang": "jpn_Jpan",
|
44 |
+
"tgt_lang": "eng_Latn"
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "facebook/nllb-200-1.3B",
|
48 |
+
"description": "Translation • Updated Feb 12, 2023 • 14.6k • 44",
|
49 |
+
"src_lang": "eng_Latn",
|
50 |
+
"tgt_lang": "jpn_Jpan"
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"name": "facebook/nllb-200-distilled-1.3B",
|
54 |
+
"description": "Translation • Updated Feb 12, 2023 • 101k • 98",
|
55 |
+
"src_lang": "eng_Latn",
|
56 |
+
"tgt_lang": "jpn_Jpan"
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"name": "alirezamsh/small100",
|
60 |
+
"description": "Translation • Updated Jul 23 • 1.85k • 60",
|
61 |
+
"src_lang": "eng_Latn",
|
62 |
+
"tgt_lang": "jpn_Jpan"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"name": "Unbabel/wmt22-cometkiwi-da",
|
66 |
+
"description": "Translation • Updated Oct 13, 2023 • 1 • 24",
|
67 |
+
"src_lang": "eng_Latn",
|
68 |
+
"tgt_lang": "jpn_Jpan"
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"name": "ychenNLP/nllb-200-3.3B-easyproject",
|
72 |
+
"description": "Translation • Updated Aug 30, 2023 • 73 • 2",
|
73 |
+
"src_lang": "eng_Latn",
|
74 |
+
"tgt_lang": "jpn_Jpan"
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2-float16",
|
78 |
+
"description": "Translation • Updated May 10, 2023 • 2",
|
79 |
+
"src_lang": "jpn_Jpan",
|
80 |
+
"tgt_lang": "eng_Latn"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2-int8",
|
84 |
+
"description": "Translation • Updated May 10, 2023 • 22 • 1",
|
85 |
+
"src_lang": "jpn_Jpan",
|
86 |
+
"tgt_lang": "eng_Latn"
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"name": "JustFrederik/sugoi-v4-ja-en-ct2-float16",
|
90 |
+
"description": "Translation • Updated May 10, 2023 • 13 • 1",
|
91 |
+
"src_lang": "jpn_Jpan",
|
92 |
+
"tgt_lang": "eng_Latn"
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"name": "JustFrederik/sugoi-v4-ja-en-ct2-int8",
|
96 |
+
"description": "Translation • Updated May 10, 2023",
|
97 |
+
"src_lang": "jpn_Jpan",
|
98 |
+
"tgt_lang": "eng_Latn"
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"name": "JustFrederik/sugoi-v4-ja-en-ct2",
|
102 |
+
"description": "Translation • Updated May 10, 2023 • 20 • 1",
|
103 |
+
"src_lang": "jpn_Jpan",
|
104 |
+
"tgt_lang": "eng_Latn"
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"name": "JustFrederik/sugoi-v3.3-ja-en-ct2",
|
108 |
+
"description": "Translation • Updated May 10, 2023",
|
109 |
+
"src_lang": "jpn_Jpan",
|
110 |
+
"tgt_lang": "eng_Latn"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"name": "JustFrederik/nllb-200-distilled-600M-ct2-int8",
|
114 |
+
"description": "Translation • Updated May 15, 2023 • 225",
|
115 |
+
"src_lang": "eng_Latn",
|
116 |
+
"tgt_lang": "jpn_Jpan"
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2-int8",
|
120 |
+
"description": "Translation • Updated May 15, 2023 • 74 • 1",
|
121 |
+
"src_lang": "eng_Latn",
|
122 |
+
"tgt_lang": "jpn_Jpan"
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"name": "JustFrederik/nllb-200-1.3B-ct2-int8",
|
126 |
+
"description": "Translation • Updated May 15, 2023 • 12",
|
127 |
+
"src_lang": "eng_Latn",
|
128 |
+
"tgt_lang": "jpn_Jpan"
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"name": "JustFrederik/nllb-200-1.3B-ct2-float16",
|
132 |
+
"description": "Translation • Updated May 15, 2023 • 6",
|
133 |
+
"src_lang": "eng_Latn",
|
134 |
+
"tgt_lang": "jpn_Jpan"
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"name": "JustFrederik/nllb-200-1.3B-ct2",
|
138 |
+
"description": "Translation • Updated May 15, 2023 • 14",
|
139 |
+
"src_lang": "eng_Latn",
|
140 |
+
"tgt_lang": "jpn_Jpan"
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2",
|
144 |
+
"description": "Translation • Updated May 15, 2023 • 3",
|
145 |
+
"src_lang": "eng_Latn",
|
146 |
+
"tgt_lang": "jpn_Jpan"
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"name": "JustFrederik/nllb-200-distilled-1.3B-ct2-float16",
|
150 |
+
"description": "Translation • Updated May 15, 2023 • 7 • 1",
|
151 |
+
"src_lang": "eng_Latn",
|
152 |
+
"tgt_lang": "jpn_Jpan"
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"name": "JustFrederik/nllb-200-distilled-600M-ct2",
|
156 |
+
"description": "Translation • Updated May 15, 2023 • 4",
|
157 |
+
"src_lang": "eng_Latn",
|
158 |
+
"tgt_lang": "jpn_Jpan"
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"name": "JustFrederik/nllb-200-distilled-600M-ct2-float16",
|
162 |
+
"description": "Translation • Updated May 15, 2023 • 8",
|
163 |
+
"src_lang": "eng_Latn",
|
164 |
+
"tgt_lang": "jpn_Jpan"
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"name": "JustFrederik/nllb-200-3.3B-ct2-float16",
|
168 |
+
"description": "Translation • Updated May 15, 2023 • 26 • 3",
|
169 |
+
"src_lang": "eng_Latn",
|
170 |
+
"tgt_lang": "jpn_Jpan"
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"name": "Babelscape/mrebel-large",
|
174 |
+
"description": "Translation • Updated Jun 21, 2023 • 67.5k • 66",
|
175 |
+
"src_lang": "eng_Latn",
|
176 |
+
"tgt_lang": "jpn_Jpan"
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"name": "Babelscape/mrebel-large-32",
|
180 |
+
"description": "Translation • Updated Jun 23, 2023 • 97 • 6",
|
181 |
+
"src_lang": "eng_Latn",
|
182 |
+
"tgt_lang": "jpn_Jpan"
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"name": "Babelscape/mrebel-base",
|
186 |
+
"description": "Translation • Updated Jun 23, 2023 • 66 • 5",
|
187 |
+
"src_lang": "eng_Latn",
|
188 |
+
"tgt_lang": "jpn_Jpan"
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"name": "winstxnhdw/nllb-200-distilled-1.3B-ct2-int8",
|
192 |
+
"description": "Translation • Updated Aug 3, 2023 • 2.42k • 4",
|
193 |
+
"src_lang": "eng_Latn",
|
194 |
+
"tgt_lang": "jpn_Jpan"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"name": "michaelfeil/ct2fast-nllb-200-distilled-1.3B",
|
198 |
+
"description": "Translation • Updated Dec 10, 2023 • 10 • 1",
|
199 |
+
"src_lang": "eng_Latn",
|
200 |
+
"tgt_lang": "jpn_Jpan"
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"name": "michaelfeil/ct2fast-nllb-200-3.3B",
|
204 |
+
"description": "Translation • Updated Jul 21, 2023 • 36 • 11",
|
205 |
+
"src_lang": "eng_Latn",
|
206 |
+
"tgt_lang": "jpn_Jpan"
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"name": "qiyuw/WSPAlign-xlm-base",
|
210 |
+
"description": "Translation • Updated Mar 18 • 4",
|
211 |
+
"src_lang": "xlm_Latn",
|
212 |
+
"tgt_lang": "jpn_Jpan"
|
213 |
+
},
|
214 |
+
# 既存のモデルを以下に追加できます
|
215 |
+
]
|
216 |
+
|
217 |
+
# プルダウンメニューでモデルを選択
|
218 |
+
st.sidebar.header("翻訳モデルの選択")
|
219 |
+
selected_model = st.sidebar.selectbox(
|
220 |
+
"使用する翻訳モデルを選択してください:",
|
221 |
+
options=models,
|
222 |
+
format_func=lambda x: f"{x['name']} - {x['description']}"
|
223 |
)
|
224 |
|
|
|
|
|
|
|
225 |
@st.cache_resource
|
226 |
+
def load_translation_model(model_name, src_lang, tgt_lang):
|
227 |
"""
|
228 |
+
選択された翻訳モデルをロードし、キャッシュします。
|
229 |
"""
|
230 |
+
try:
|
231 |
+
translator = pipeline(
|
232 |
+
"translation",
|
233 |
+
model=model_name,
|
234 |
+
src_lang=src_lang,
|
235 |
+
tgt_lang=tgt_lang
|
236 |
+
)
|
237 |
+
return translator
|
238 |
+
except Exception as e:
|
239 |
+
st.error(f"翻訳モデルのロード中にエラーが発生しました: {e}")
|
240 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
+
@st.cache_data
|
243 |
+
def translate_text(_translator, text):
|
244 |
+
"""
|
245 |
+
テキストを日本語に翻訳します。
|
246 |
+
翻訳結果をキャッシュします。
|
247 |
+
"""
|
248 |
+
if not _translator:
|
249 |
+
return "翻訳エラー"
|
250 |
+
try:
|
251 |
+
translation = _translator(text, max_length=500)[0]['translation_text']
|
252 |
+
return translation
|
253 |
+
except Exception as e:
|
254 |
+
st.error(f"翻訳中にエラーが発生しました: {e}")
|
255 |
+
return "翻訳エラー"
|
256 |
|
257 |
+
@st.cache_data(ttl=3600)
|
258 |
+
def fetch_rss_feed(url):
|
259 |
+
"""
|
260 |
+
RSSフィードを取得し、XMLを保存してパースします。
|
261 |
+
キャッシュの有効期限は1時間です。
|
262 |
+
"""
|
263 |
+
try:
|
264 |
+
response = requests.get(url)
|
265 |
+
if response.status_code != 200:
|
266 |
+
st.error(f"RSSフィードの取得に失敗しました。ステータスコード: {response.status_code}")
|
267 |
+
return None
|
268 |
+
# フィードのXMLを保存(データセットとして蓄積)
|
269 |
+
now = datetime.datetime.now()
|
270 |
+
filename = now.strftime("feed_%Y%m%d_%H%M%S.xml")
|
271 |
+
with open(filename, 'wb') as f:
|
272 |
+
f.write(response.content)
|
273 |
+
# フィードをパース
|
274 |
+
feed = feedparser.parse(response.content)
|
275 |
+
return feed
|
276 |
+
except Exception as e:
|
277 |
+
st.error(f"RSSフィードの取得中にエラーが発生しました: {e}")
|
278 |
+
return None
|
279 |
|
280 |
+
# フィードを取得
|
281 |
+
feed = fetch_rss_feed(rss_url)
|
282 |
|
283 |
+
if feed is None:
|
284 |
+
st.stop() # フィードの取得に失敗した場合、アプリを停止します
|
285 |
|
286 |
+
# 翻訳モデルをロード
|
287 |
+
translator = load_translation_model(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
+
# フィード内の記事をパースしてタイトルと説明を翻訳
|
290 |
+
for entry in feed.entries:
|
291 |
+
# タイトルと説明を取得
|
292 |
+
title = entry.title
|
293 |
+
description = entry.description
|
|
|
|
|
|
|
|
|
294 |
|
295 |
+
# タイトルと説明を日本語に翻訳(翻訳結果をキャッシュ)
|
296 |
+
translated_title = translate_text(translator, title)
|
297 |
+
translated_description = translate_text(translator, description)
|
|
|
|
|
|
|
|
|
298 |
|
299 |
+
# Markdown形式で表示
|
300 |
+
st.markdown(f"### **タイトル(英語):** {title}")
|
301 |
+
st.markdown(f"**タイトル(日本語):** {translated_title}")
|
302 |
+
st.markdown(f"**概要(英語):**")
|
303 |
+
st.write(description)
|
304 |
+
st.markdown(f"**概要(日本語):**")
|
305 |
+
st.write(translated_description)
|
306 |
+
st.markdown(f"[🌐 続きを読む]({entry.link})")
|
307 |
+
st.markdown("---")
|
308 |
|
309 |
+
# キャッシュをクリアするボタン
|
310 |
+
if st.button("キャッシュをクリア"):
|
311 |
+
load_translation_model.clear(selected_model['name'], selected_model['src_lang'], selected_model['tgt_lang'])
|
312 |
+
translate_text.clear()
|
313 |
+
fetch_rss_feed.clear()
|
314 |
+
st.success("キャッシュをクリアしました。")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|