TURNA / apps /pos_tagging.py
furkanakkurt1618's picture
add ner and pos_tagging
f2d86ab
import requests
import streamlit as st
import time
from transformers import pipeline
import os
from .utils import query
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
headers = {"Authorization": f"Bearer {HF_AUTH_TOKEN}"}
def write():
st.markdown("# Part-of-Speech Tagging")
st.sidebar.header("Part-of-Speech Tagging")
st.write(
'''Here, you can detect part-of-speech tags in your text using the fine-tuned TURNA POS models.'''
)
# Sidebar
# Taken from https://huggingface.co/spaces/flax-community/spanish-gpt2/blob/main/app.py
st.sidebar.subheader("Configurable parameters")
model_name = st.sidebar.selectbox(
"Model Selector",
options=[
"turna_pos_boun",
"turna_pos_imst"
],
index=0,
)
max_new_tokens = st.sidebar.number_input(
"Maximum length",
min_value=0,
max_value=64,
value=64,
help="The maximum length of the sequence to be generated.",
)
length_penalty = st.sidebar.number_input(
"Length penalty",
value=2.0,
help=" length_penalty > 0.0 promotes longer sequences, while length_penalty < 0.0 encourages shorter sequences. ",
)
no_repeat_ngram_size = st.sidebar.number_input(
"No Repeat N-Gram Size",
min_value=0,
value=3,
help="If set to int > 0, all ngrams of that size can only occur once.",
)
input_text = st.text_area(label='Enter a text: ', height=100,
value="Çünkü her kişinin bir başka yolu, bir başka yöntemi olmak gerektir.")
url = ("https://api-inference.huggingface.co/models/boun-tabi-LMG/" + model_name.lower())
params = {"length_penalty": length_penalty, "no_repeat_ngram_size": no_repeat_ngram_size, "max_new_tokens": max_new_tokens }
if st.button("Generate"):
with st.spinner('Generating...'):
output = query(input_text, url, params)
st.success(output)