ihsan66 commited on
Commit
df052cb
1 Parent(s): 5ff50cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -61
app.py CHANGED
@@ -5,8 +5,8 @@ import spacy
5
  import io
6
  import torch
7
  import torchaudio
8
- import librosa
9
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
10
 
11
  st.set_page_config(layout="wide")
12
 
@@ -36,6 +36,10 @@ st.sidebar.write("")
36
 
37
  if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
38
  aggregation = "simple"
 
 
 
 
39
  else:
40
  aggregation = "first"
41
 
@@ -44,19 +48,24 @@ st.subheader("Metin Giriş Yöntemi Seç")
44
  input_method = st.radio("", ('Örneklerden Seç', 'Metin Yaz veya Yapıştır', 'Dosya Yükle', 'Ses Dosyası Yükle'))
45
 
46
  if input_method == 'Örneklerden Seç':
47
- selected_text = st.selectbox('Metin Seç', example_list, index=0, key="example_selectbox")
48
- input_text = st.text_area("Metin", selected_text, height=128, key="text_area")
 
49
  elif input_method == "Metin Yaz veya Yapıştır":
50
- input_text = st.text_area('Metin Yaz veya Yapıştır', value="", height=128, key="text_area")
 
51
  elif input_method == "Dosya Yükle":
 
52
  uploaded_file = st.file_uploader("Dosya Seç", type="txt")
53
  if uploaded_file is not None:
54
  input_text = str(uploaded_file.read(), "utf-8")
55
  else:
56
  input_text = ""
57
  elif input_method == "Ses Dosyası Yükle":
 
58
  uploaded_audio = st.file_uploader("Ses Dosyasını Seç", type=["wav"], key="audio_file_uploader")
59
- audio_bytes = uploaded_audio.read() if uploaded_audio is not None else None
 
60
 
61
  @st.cache_resource
62
  def load_pipeline(model_name, task_type):
@@ -125,63 +134,76 @@ def transcribe_audio(audio_file):
125
 
126
  return transcription
127
 
 
 
 
 
 
 
 
 
 
128
 
129
- # Çalıştır butonu
130
- if st.button("Çalıştır"):
131
- if input_method == "Ses Dosyası Yükle" and uploaded_audio is not None:
 
 
132
  transcription = transcribe_audio(uploaded_audio)
133
  st.subheader("Ses Transkripsiyonu")
134
  st.write(transcription)
135
- elif input_method in ["Metin Yaz veya Yapıştır", "Örneklerden Seç", "Dosya Yükle"] and input_text:
136
- task = st.sidebar.radio("Görev Seçin", ['Metin Sınıflandırma', 'Metin Analizi', 'Duygu Analizi', 'Metin Oluşturma'])
137
 
138
- if task == "Metin Sınıflandırma":
139
- pipeline_model = load_pipeline(model_checkpoint, task)
140
- output = pipeline_model(input_text)
141
- df = pd.DataFrame(output)
142
- st.subheader(f"{task} Sonuçları")
143
- st.dataframe(df)
144
-
145
- elif task == "Duygu Analizi":
146
- pipeline_model = load_pipeline(model_checkpoint, task)
147
- output = pipeline_model(input_text)
148
- df = pd.DataFrame(output)
149
- st.subheader(f"{task} Sonuçları")
150
- st.dataframe(df)
151
-
152
- elif task == "Metin Analizi":
153
- ner_pipeline = setModel(model_checkpoint, aggregation)
154
- output = ner_pipeline(input_text)
155
- output_comb = entity_comb(output)
156
- df = pd.DataFrame.from_dict(output_comb)
157
- cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
158
- df_final = df[cols_to_keep]
159
- st.subheader("Tanımlanan Varlıklar")
160
- st.dataframe(df_final)
161
-
162
- st.subheader("Spacy Tarzı Görselleştirme")
163
- spacy_display = {"ents": [], "text": input_text, "title": None}
164
-
165
- for entity in output_comb:
166
- spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
167
-
168
- tner_entity_list = ["person", "group", "facility", "organization", "geopolitical area", "location", "product", "event", "work of art", "law", "language", "date", "time", "percent", "money", "quantity", "ordinal number", "cardinal number"]
169
- spacy_entity_list = ["PERSON", "NORP", "FAC", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL", "MISC"]
170
-
171
- for ent in spacy_display["ents"]:
172
- if model_checkpoint == "asahi417/tner-xlm-roberta-base-ontonotes5":
173
- ent["label"] = spacy_entity_list[tner_entity_list.index(ent["label"])]
174
- else:
175
- if ent["label"] == "PER":
176
- ent["label"] = "PERSON"
177
-
178
- html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": spacy_entity_list})
179
- style = "<style>mark.entity { display: inline-block }</style>"
180
- st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
181
-
182
- elif task == "Metin Oluşturma":
183
- pipeline_model = load_pipeline(model_checkpoint, task)
184
- output = pipeline_model(input_text, max_length=50, num_return_sequences=1)
185
- st.subheader(f"{task} Sonuçları")
186
- for idx, item in enumerate(output):
187
- st.write(f"Öneri {idx+1}: {item['generated_text']}")
 
 
 
 
 
5
  import io
6
  import torch
7
  import torchaudio
8
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2FeatureExtractor
9
+ from transformers import Wav2Vec2Processor
10
 
11
  st.set_page_config(layout="wide")
12
 
 
36
 
37
  if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
38
  aggregation = "simple"
39
+ elif model_checkpoint in ["xlm-roberta-large-finetuned-conll03-english", "asahi417/tner-xlm-roberta-base-ontonotes5"]:
40
+ aggregation = "simple"
41
+ st.sidebar.write("")
42
+ st.sidebar.write("The selected NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta pretrained language model.")
43
  else:
44
  aggregation = "first"
45
 
 
48
  input_method = st.radio("", ('Örneklerden Seç', 'Metin Yaz veya Yapıştır', 'Dosya Yükle', 'Ses Dosyası Yükle'))
49
 
50
  if input_method == 'Örneklerden Seç':
51
+ selected_text = st.selectbox('Metin Seç', example_list, index=0, key=1)
52
+ st.subheader("Seçilen Metin")
53
+ input_text = st.text_area("Metin", selected_text, height=128, max_chars=None, key=2)
54
  elif input_method == "Metin Yaz veya Yapıştır":
55
+ st.subheader("Metin")
56
+ input_text = st.text_area('Metin Yaz veya Yapıştır', value="", height=128, max_chars=None, key=2)
57
  elif input_method == "Dosya Yükle":
58
+ st.subheader("Metin")
59
  uploaded_file = st.file_uploader("Dosya Seç", type="txt")
60
  if uploaded_file is not None:
61
  input_text = str(uploaded_file.read(), "utf-8")
62
  else:
63
  input_text = ""
64
  elif input_method == "Ses Dosyası Yükle":
65
+ st.subheader("Ses Dosyası")
66
  uploaded_audio = st.file_uploader("Ses Dosyasını Seç", type=["wav"], key="audio_file_uploader")
67
+ if uploaded_audio is not None:
68
+ audio_bytes = uploaded_audio.read()
69
 
70
  @st.cache_resource
71
  def load_pipeline(model_name, task_type):
 
134
 
135
  return transcription
136
 
137
+ Run_Button = st.button("Çalıştır", key=None)
138
+
139
+ if input_method == "Metin Yaz veya Yapıştır":
140
+ st.subheader("Metin Girişi")
141
+ input_text = st.text_area("Metni buraya yazın veya yapıştırın:", key="text_input_area")
142
+
143
+ if input_text != "":
144
+ st.subheader("Girdiğiniz Metin")
145
+ st.write(input_text)
146
 
147
+ elif input_method == "Ses Dosyası Yükle":
148
+ st.subheader("Ses Dosyası")
149
+ uploaded_audio = st.file_uploader("Ses Dosyasını Seç", type=["wav"], key="audio_file_uploader")
150
+
151
+ if uploaded_audio is not None:
152
  transcription = transcribe_audio(uploaded_audio)
153
  st.subheader("Ses Transkripsiyonu")
154
  st.write(transcription)
 
 
155
 
156
+ if input_text != "":
157
+ if task == "Metin Sınıflandırma":
158
+ pipeline_model = load_pipeline(model_checkpoint, task)
159
+ output = pipeline_model(input_text)
160
+ df = pd.DataFrame(output)
161
+ st.subheader(f"{task} Sonuçları")
162
+ st.dataframe(df)
163
+
164
+ elif task == "Duygu Analizi":
165
+ pipeline_model = load_pipeline(model_checkpoint, task)
166
+ output = pipeline_model(input_text)
167
+ df = pd.DataFrame(output)
168
+ st.subheader(f"{task} Sonuçları")
169
+ st.dataframe(df)
170
+
171
+ elif task == "Metin Analizi":
172
+ ner_pipeline = setModel(model_checkpoint, aggregation)
173
+ output = ner_pipeline(input_text)
174
+ output_comb = entity_comb(output)
175
+ df = pd.DataFrame.from_dict(output_comb)
176
+ cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
177
+ df_final = df[cols_to_keep]
178
+ st.subheader("Tanımlanan Varlıklar")
179
+ st.dataframe(df_final)
180
+
181
+ st.subheader("Spacy Tarzı Görselleştirme")
182
+ spacy_display = {}
183
+ spacy_display["ents"] = []
184
+ spacy_display["text"] = input_text
185
+ spacy_display["title"] = None
186
+
187
+ for entity in output_comb:
188
+ spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
189
+
190
+ tner_entity_list = ["person", "group", "facility", "organization", "geopolitical area", "location", "product", "event", "work of art", "law", "language", "date", "time", "percent", "money", "quantity", "ordinal number", "cardinal number"]
191
+ spacy_entity_list = ["PERSON", "NORP", "FAC", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL", "MISC"]
192
+
193
+ for ent in spacy_display["ents"]:
194
+ if model_checkpoint == "asahi417/tner-xlm-roberta-base-ontonotes5":
195
+ ent["label"] = spacy_entity_list[tner_entity_list.index(ent["label"])]
196
+ else:
197
+ if ent["label"] == "PER":
198
+ ent["label"] = "PERSON"
199
+
200
+ html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": spacy_entity_list})
201
+ style = "<style>mark.entity { display: inline-block }</style>"
202
+ st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
203
+
204
+ elif task == "Metin Oluşturma":
205
+ pipeline_model = load_pipeline(model_checkpoint, task)
206
+ output = pipeline_model(input_text, max_length=50, num_return_sequences=1)
207
+ st.subheader(f"{task} Sonuçları")
208
+ for idx, item in enumerate(output):
209
+ st.write(f"Öneri {idx+1}: {item['generated_text']}")