from transformers import AutoTokenizer, AutoModelForCausalLM import streamlit as st tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", flash_attn=True, flash_rotary=True, fused_dense=True, device_map="cuda", trust_remote_code=True) prompt = st.text_input("Input prompt", value="Write a detailed analogy between mathematics and a lighthouse.") length = st.number_input("Max token length", value=200) inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False) outputs = model.generate(**inputs, max_length=length) text = tokenizer.batch_decode(outputs)[0] st.write(text)