Spaces:

Mikhil-jivus
/

EndpointTesting

Runtime error

Mikhil-jivus commited on Oct 4, 2024

Commit

0a5ec67

verified ·

1 Parent(s): 0cd712e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 access_token = os.getenv('HF_TOKEN')
@@ -10,7 +11,13 @@ access_token = "your_access_token_here"
 # Load the tokenizer and model from the Hugging Face repository
 tokenizer = AutoTokenizer.from_pretrained(repo_id, token=access_token)
-model = AutoModelForCausalLM.from_pretrained(repo_id, token=access_token)
 def respond(
     message,

 import os
 import gradio as gr
+import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 access_token = os.getenv('HF_TOKEN')
 # Load the tokenizer and model from the Hugging Face repository
 tokenizer = AutoTokenizer.from_pretrained(repo_id, token=access_token)
+model = AutoModelForCausalLM.from_pretrained(
+    repo_id,
+    token=access_token,
+    torch_dtype=torch.bfloat16,  # or use torch.bfloat16 if supported
+    device_map="auto"  # Automatically use available GPU/CPU efficiently
+)
 def respond(
     message,