Spaces:

WhiteAngelss
/

turkce-varlik-tanima-teknofest-nlp

Running

App Files Files Community

ihsan66 commited on Jul 31, 2024

Commit

df052cb

verified ·

1 Parent(s): 5ff50cc

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -61

app.py CHANGED Viewed

@@ -5,8 +5,8 @@ import spacy
 import io
 import torch
 import torchaudio
-import librosa
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 st.set_page_config(layout="wide")
@@ -36,6 +36,10 @@ st.sidebar.write("")
 if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
     aggregation = "simple"
 else:
     aggregation = "first"
@@ -44,19 +48,24 @@ st.subheader("Metin Giriş Yöntemi Seç")
 input_method = st.radio("", ('Örneklerden Seç', 'Metin Yaz veya Yapıştır', 'Dosya Yükle', 'Ses Dosyası Yükle'))
 if input_method == 'Örneklerden Seç':
-    selected_text = st.selectbox('Metin Seç', example_list, index=0, key="example_selectbox")
-    input_text = st.text_area("Metin", selected_text, height=128, key="text_area")
 elif input_method == "Metin Yaz veya Yapıştır":
-    input_text = st.text_area('Metin Yaz veya Yapıştır', value="", height=128, key="text_area")
 elif input_method == "Dosya Yükle":
     uploaded_file = st.file_uploader("Dosya Seç", type="txt")
     if uploaded_file is not None:
         input_text = str(uploaded_file.read(), "utf-8")
     else:
         input_text = ""
 elif input_method == "Ses Dosyası Yükle":
     uploaded_audio = st.file_uploader("Ses Dosyasını Seç", type=["wav"], key="audio_file_uploader")
-    audio_bytes = uploaded_audio.read() if uploaded_audio is not None else None
 @st.cache_resource
 def load_pipeline(model_name, task_type):
@@ -125,63 +134,76 @@ def transcribe_audio(audio_file):
     return transcription
-# Çalıştır butonu
-if st.button("Çalıştır"):
-    if input_method == "Ses Dosyası Yükle" and uploaded_audio is not None:
         transcription = transcribe_audio(uploaded_audio)
         st.subheader("Ses Transkripsiyonu")
         st.write(transcription)
-    elif input_method in ["Metin Yaz veya Yapıştır", "Örneklerden Seç", "Dosya Yükle"] and input_text:
-        task = st.sidebar.radio("Görev Seçin", ['Metin Sınıflandırma', 'Metin Analizi', 'Duygu Analizi', 'Metin Oluşturma'])
-        if task == "Metin Sınıflandırma":
-            pipeline_model = load_pipeline(model_checkpoint, task)
-            output = pipeline_model(input_text)
-            df = pd.DataFrame(output)
-            st.subheader(f"{task} Sonuçları")
-            st.dataframe(df)
-        elif task == "Duygu Analizi":
-            pipeline_model = load_pipeline(model_checkpoint, task)
-            output = pipeline_model(input_text)
-            df = pd.DataFrame(output)
-            st.subheader(f"{task} Sonuçları")
-            st.dataframe(df)
-        elif task == "Metin Analizi":
-            ner_pipeline = setModel(model_checkpoint, aggregation)
-            output = ner_pipeline(input_text)
-            output_comb = entity_comb(output)
-            df = pd.DataFrame.from_dict(output_comb)
-            cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
-            df_final = df[cols_to_keep]
-            st.subheader("Tanımlanan Varlıklar")
-            st.dataframe(df_final)
-            st.subheader("Spacy Tarzı Görselleştirme")
-            spacy_display = {"ents": [], "text": input_text, "title": None}
-            for entity in output_comb:
-                spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
-            tner_entity_list = ["person", "group", "facility", "organization", "geopolitical area", "location", "product", "event", "work of art", "law", "language", "date", "time", "percent", "money", "quantity", "ordinal number", "cardinal number"]
-            spacy_entity_list = ["PERSON", "NORP", "FAC", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL", "MISC"]
-            for ent in spacy_display["ents"]:
-                if model_checkpoint == "asahi417/tner-xlm-roberta-base-ontonotes5":
-                    ent["label"] = spacy_entity_list[tner_entity_list.index(ent["label"])]
-                else:
-                    if ent["label"] == "PER":
-                        ent["label"] = "PERSON"
-            html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": spacy_entity_list})
-            style = "<style>mark.entity { display: inline-block }</style>"
-            st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
-        elif task == "Metin Oluşturma":
-            pipeline_model = load_pipeline(model_checkpoint, task)
-            output = pipeline_model(input_text, max_length=50, num_return_sequences=1)
-            st.subheader(f"{task} Sonuçları")
-            for idx, item in enumerate(output):
-                st.write(f"Öneri {idx+1}: {item['generated_text']}")

 import io
 import torch
 import torchaudio
+from transformers import Wav2Vec2ForCTC, Wav2Vec2FeatureExtractor
+from transformers import Wav2Vec2Processor
 st.set_page_config(layout="wide")
 if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
     aggregation = "simple"
+elif model_checkpoint in ["xlm-roberta-large-finetuned-conll03-english", "asahi417/tner-xlm-roberta-base-ontonotes5"]:
+    aggregation = "simple"
+    st.sidebar.write("")
+    st.sidebar.write("The selected NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta pretrained language model.")
 else:
     aggregation = "first"
 input_method = st.radio("", ('Örneklerden Seç', 'Metin Yaz veya Yapıştır', 'Dosya Yükle', 'Ses Dosyası Yükle'))
 if input_method == 'Örneklerden Seç':
+    selected_text = st.selectbox('Metin Seç', example_list, index=0, key=1)
+    st.subheader("Seçilen Metin")
+    input_text = st.text_area("Metin", selected_text, height=128, max_chars=None, key=2)
 elif input_method == "Metin Yaz veya Yapıştır":
+    st.subheader("Metin")
+    input_text = st.text_area('Metin Yaz veya Yapıştır', value="", height=128, max_chars=None, key=2)
 elif input_method == "Dosya Yükle":
+    st.subheader("Metin")
     uploaded_file = st.file_uploader("Dosya Seç", type="txt")
     if uploaded_file is not None:
         input_text = str(uploaded_file.read(), "utf-8")
     else:
         input_text = ""
 elif input_method == "Ses Dosyası Yükle":
+    st.subheader("Ses Dosyası")
     uploaded_audio = st.file_uploader("Ses Dosyasını Seç", type=["wav"], key="audio_file_uploader")
+    if uploaded_audio is not None:
+        audio_bytes = uploaded_audio.read()
 @st.cache_resource
 def load_pipeline(model_name, task_type):
     return transcription
+Run_Button = st.button("Çalıştır", key=None)
+if input_method == "Metin Yaz veya Yapıştır":
+    st.subheader("Metin Girişi")
+    input_text = st.text_area("Metni buraya yazın veya yapıştırın:", key="text_input_area")
+    if input_text != "":
+        st.subheader("Girdiğiniz Metin")
+        st.write(input_text)
+elif input_method == "Ses Dosyası Yükle":
+    st.subheader("Ses Dosyası")
+    uploaded_audio = st.file_uploader("Ses Dosyasını Seç", type=["wav"], key="audio_file_uploader")
+    if uploaded_audio is not None:
         transcription = transcribe_audio(uploaded_audio)
         st.subheader("Ses Transkripsiyonu")
         st.write(transcription)
+if input_text != "":
+    if task == "Metin Sınıflandırma":
+        pipeline_model = load_pipeline(model_checkpoint, task)
+        output = pipeline_model(input_text)
+        df = pd.DataFrame(output)
+        st.subheader(f"{task} Sonuçları")
+        st.dataframe(df)
+    elif task == "Duygu Analizi":
+        pipeline_model = load_pipeline(model_checkpoint, task)
+        output = pipeline_model(input_text)
+        df = pd.DataFrame(output)
+        st.subheader(f"{task} Sonuçları")
+        st.dataframe(df)
+    elif task == "Metin Analizi":
+        ner_pipeline = setModel(model_checkpoint, aggregation)
+        output = ner_pipeline(input_text)
+        output_comb = entity_comb(output)
+        df = pd.DataFrame.from_dict(output_comb)
+        cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
+        df_final = df[cols_to_keep]
+        st.subheader("Tanımlanan Varlıklar")
+        st.dataframe(df_final)
+        st.subheader("Spacy Tarzı Görselleştirme")
+        spacy_display = {}
+        spacy_display["ents"] = []
+        spacy_display["text"] = input_text
+        spacy_display["title"] = None
+        for entity in output_comb:
+            spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
+        tner_entity_list = ["person", "group", "facility", "organization", "geopolitical area", "location", "product", "event", "work of art", "law", "language", "date", "time", "percent", "money", "quantity", "ordinal number", "cardinal number"]
+        spacy_entity_list = ["PERSON", "NORP", "FAC", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", "LANGUAGE", "DATE", "TIME", "PERCENT", "MONEY", "QUANTITY", "ORDINAL", "CARDINAL", "MISC"]
+        for ent in spacy_display["ents"]:
+            if model_checkpoint == "asahi417/tner-xlm-roberta-base-ontonotes5":
+                ent["label"] = spacy_entity_list[tner_entity_list.index(ent["label"])]
+            else:
+                if ent["label"] == "PER":
+                    ent["label"] = "PERSON"
+        html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": spacy_entity_list})
+        style = "<style>mark.entity { display: inline-block }</style>"
+        st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
+    elif task == "Metin Oluşturma":
+        pipeline_model = load_pipeline(model_checkpoint, task)
+        output = pipeline_model(input_text, max_length=50, num_return_sequences=1)
+        st.subheader(f"{task} Sonuçları")
+        for idx, item in enumerate(output):
+            st.write(f"Öneri {idx+1}: {item['generated_text']}")