Spaces:

data-silence
/

sumnews

Sleeping

File size: 2,003 Bytes

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Загрузка модели и токенизатора
model_name = "data-silence/any-news-sum"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_summary_with_special_tokens(text, max_length=512):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_length, truncation=True).to(device)
    
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        num_return_sequences=1,
        no_repeat_ngram_size=4,
    )
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
    
    # Разделение на заголовок и резюме
    parts = generated_text.split('<title_resume_sep>')
    title = parts[0].replace("<pad> ", "").strip()
    resume = parts[1].replace("</s>", "").strip() if len(parts) > 1 else ""
    
    return title, resume

def summarize(text):
    title, resume = generate_summary_with_special_tokens(text)
    return title, resume

# Создание интерфейса Gradio
iface = gr.Interface(
    fn=summarize,
    inputs=gr.Textbox(lines=10, label="Enter news text in any language | Введите текст новости"),
    outputs=[
        gr.Textbox(label="Generated header | Сгенерированный заголовок"),
        gr.Textbox(label="Generated summary | Сгенерированное резюме")
    ],
    title="Generator of headlines and news summaries | Генератор заголовков и резюме новостей",
    description="Enter the article of news and the model will create a headline and a brief summary for it | Введите текст новости, и модель создаст для неё заголовок и краткое резюме"
)

iface.launch()