import os from typing import Tuple import streamlit as st import torch import transformers import tokenizers from sampling import CAIFSampler, TopKWithTemperatureSampler from generator import Generator device = "cuda" if torch.cuda.is_available() else "cpu" def main(): st.subheader( 'Эта демонстрация позволяет поэксперементировать с моделями, которые оценивают, насколько предлагаемый ответ подходит к контексту диалога.') cls_model_name = st.selectbox( 'Выберите модель классификации', ('tinkoff-ai/response-quality-classifier-tiny', 'tinkoff-ai/response-quality-classifier-base', 'tinkoff-ai/response-quality-classifier-large') ) lm_model_name = st.selectbox( 'Выберите языковую модель', ('sberbank-ai/rugpt3small_based_on_gpt2',) ) prompt = st.text_input("Начало текста:", "Привет") auth_token = os.environ.get('TOKEN') or True with st.spinner('Running inference...'): text = inference(lm_model_name=lm_model_name, cls_model_name=cls_model_name, prompt=prompt) st.text_area(text) @st.cache(hash_funcs={str: lambda lm_model_name: hash(lm_model_name)}, allow_output_mutation=True) def load_generator(lm_model_name: str) -> Generator: with st.spinner('Loading language model...'): generator = Generator(lm_model_name=lm_model_name, device=device) return generator def load_sampler(cls_model_name, lm_tokenizer): with st.spinner('Loading classifier model...'): sampler = CAIFSampler(classifier_name=cls_model_name, lm_tokenizer=lm_tokenizer, device=device) return sampler @st.cache def inference(lm_model_name: str, cls_model_name: str, prompt: str, fp16: bool = True) -> str: generator = load_generator(lm_model_name=lm_model_name) lm_tokenizer = transformers.AutoTokenizer.from_pretrained(lm_model_name) caif_sampler = load_sampler(cls_model_name=cls_model_name, lm_tokenizer=lm_tokenizer) generator.set_caif_sampler(caif_sampler) ordinary_sampler = TopKWithTemperatureSampler() kwargs = { "top_k": 20, "temperature": 1.0, "top_k_classifier": 100, "classifier_weight": 5, } generator.set_ordinary_sampler(ordinary_sampler) if device == "cpu": autocast = torch.cpu.amp.autocast else: autocast = torch.cuda.amp.autocast with autocast(fp16): sequences, tokens = generator.sample_sequences( num_samples=1, input_prompt=prompt, max_length=20, caif_period=1, caif_tokens_num=100, entropy=3.2, **kwargs ) return sequences[0] if __name__ == "__main__": main()