model not generating text
#97
by
airedwin
- opened
def load(self):
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model_8bit = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
load_in_8bit=True,
trust_remote_code=True)
self.pipeline = pipeline(
"text-generation",
model=model_8bit,
tokenizer=self.tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
def predict(self, request: Dict) -> Dict:
with torch.no_grad():
try:
prompt = request.pop("prompt")
data = self.pipeline(
prompt,
eos_token_id=self.tokenizer.eos_token_id,
max_length=DEFAULT_MAX_LENGTH,
**request
)[0]
return {"data": data}
I am using this code to get an answer after a prompt. It works locally when I run this model in a docker container on a NVIDIA RTX GPU. When I deploy my container to an AWS instance p3.2xlarge which uses NVIDIA TESLA V100 GPU, the generated_text it returns is empty. I am also using a local copy of the falcon-7b-instruct snapshot with TRANSFORMERS_OFFLINE=1.
Any ideas why this works local but not on an EC2 instance?