model is running but not printing output
!pip install -U "transformers==4.38.1" --upgrade
!pip install bitsandbytes==0.42.0
!pip install accelerate==0.27.1
import time
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch
model = "google/gemma-7b-it"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = pipeline(
"text-generation",
model=model,
model_kwargs={
"torch_dtype": torch.bfloat16,
"quantization_config": {"load_in_4bit": True}},
# device="cuda",
)
messages = [
{ "role": "user", "content": "Write a hello world program" },
]
prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
time_begin = time.time()
outputs = pipeline(
prompt,
max_new_tokens=256,
add_special_tokens=True,
do_sample=True,
temperature=0.5,
top_k=50,
top_p=0.95
)
time_end = time.time()
time_total = time_end - time_begin
print(f"{time_total:.2f} seconds, {max_num_of_words} tokens, {max_num_of_words / time_total:.2f} tokens/second")
print(outputs[0]["generated_text"][len(prompt):])
please help
Could you change do_sample=False and try again?
is print(outputs[0]["generated_text"]) printing text?
I have the same problem. the model runs infinitely without producing a result..
is print(outputs[0]["generated_text"]) printing text?
I have the same problem. the model runs infinitely without producing a result..
Could you change do_sample=False and try again?
Guys its running now , there was some problem with model file , i downloaded the fresh model files and its working completely fine .