Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from transformers import ChameleonProcessor, ChameleonForConditionalGeneration, TextIteratorStreamer
|
2 |
import torch
|
3 |
from PIL import Image
|
4 |
import requests
|
@@ -9,7 +9,14 @@ from gradio import FileData
|
|
9 |
import time
|
10 |
|
11 |
processor = ChameleonProcessor.from_pretrained("facebook/chameleon-30b")
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
@spaces.GPU
|
15 |
def bot_streaming(message, history):
|
|
|
1 |
+
from transformers import ChameleonProcessor, ChameleonForConditionalGeneration, TextIteratorStreamer, BitsAndBytesConfig
|
2 |
import torch
|
3 |
from PIL import Image
|
4 |
import requests
|
|
|
9 |
import time
|
10 |
|
11 |
processor = ChameleonProcessor.from_pretrained("facebook/chameleon-30b")
|
12 |
+
|
13 |
+
quantization_config = BitsAndBytesConfig(
|
14 |
+
load_in_4bit=True,
|
15 |
+
bnb_4bit_quant_type="nf4",
|
16 |
+
bnb_4bit_compute_dtype=torch.float16,
|
17 |
+
)
|
18 |
+
|
19 |
+
model = ChameleonForConditionalGeneration.from_pretrained("facebook/chameleon-30b", quantization_config=quantization_config).to("cuda")
|
20 |
|
21 |
@spaces.GPU
|
22 |
def bot_streaming(message, history):
|