Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ if not huggingface_token:
|
|
9 |
raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
|
10 |
|
11 |
model_id = "meta-llama/Llama-Guard-3-8B-INT8"
|
12 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
dtype = torch.bfloat16
|
14 |
|
15 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
@@ -38,6 +37,7 @@ def parse_llama_guard_output(result):
|
|
38 |
|
39 |
@spaces.GPU
|
40 |
def moderate(user_input, assistant_response):
|
|
|
41 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
|
42 |
model = AutoModelForCausalLM.from_pretrained(
|
43 |
model_id,
|
|
|
9 |
raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
|
10 |
|
11 |
model_id = "meta-llama/Llama-Guard-3-8B-INT8"
|
|
|
12 |
dtype = torch.bfloat16
|
13 |
|
14 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
|
|
37 |
|
38 |
@spaces.GPU
|
39 |
def moderate(user_input, assistant_response):
|
40 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
41 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
|
42 |
model = AutoModelForCausalLM.from_pretrained(
|
43 |
model_id,
|