Spaces:

gba16326553
/

TestYE2

Runtime error

File size: 2,621 Bytes

f1dca2a
7b189ae
 
 
 
 
 
8f53c53
f1dca2a
 
 
c0095b8
f1dca2a
cfd8886
 
0623c5b
cfd8886
f1dca2a
8ddffec
f1dca2a
718b303
7b189ae
 
 
 
321be75
 
 
 
 
 
 
 
 
7b189ae
 
 
 
 
69105c3
 
7b189ae
 
 
 
 
69105c3
7b189ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321be75

from transformers import AutoModelForCausalLM, AutoTokenizer,AutoModel
import gradio as gr
import torch

title = "🤖AI ChatBot"
description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
examples = [["How are you?"]]


# Load model directly
from transformers import AutoModel
#model = AutoModel.from_pretrained("ironlanderl/gemma-2-2b-it-Q5_K_M-GGUF")
#modelName = "google/gemma-2-2b-it"
#modelName = "ironlanderl/gemma-2-2b-it-Q5_K_M-GGUF"
modelName = "bartowski/Mistral-Nemo-Instruct-2407-GGUF"
modelId = "Mistral-Nemo-Instruct-2407-Q2_K.gguf"
tokenizer = AutoTokenizer.from_pretrained(modelName,gguf_file=modelId)

model = AutoModel.from_pretrained(modelName,gguf_file=modelId,torch_dtype=torch.float16)
#model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", torch_dtype=torch.float16 )
#stvlynn/Gemma-2-2b-Chinese-it
#tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
#model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
#The model was loaded with use_flash_attention_2=True, which is deprecated and may be removed in a future release. Please use `attn_implementation="flash_attention_2"` instead.

def generate_text(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")


"""
def predict(input, history=[]):
    # tokenize the new input sentence
    new_user_input_ids = tokenizer.encode(
        input + tokenizer.eos_token, return_tensors="pt"
    )
    #attentionMask = torch.ones(new_user_input_ids.shape, dtype=torch.long)
    
    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)

    # generate a response
    history = model.generate(
        bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
    ).tolist()

    # convert the tokens to text, and then split the responses into lines
    response = tokenizer.decode(history[0]).split("<|endoftext|>")
    # print('decoded_response-->>'+str(response))
    response = [
        (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)
    ]  # convert to tuples of list
    # print('response-->>'+str(response))
    return response, history


gr.Interface(
    fn=predict,
    title=title,
    description=description,
    examples=examples,
    inputs=["text", "state"],
    outputs=["chatbot", "state"],
    theme="finlaymacklon/boxy_violet",
).launch()
"""