Update handler.py
Browse files- handler.py +4 -5
handler.py
CHANGED
@@ -5,19 +5,18 @@ import json
|
|
5 |
|
6 |
class Qwen2VL7bHandler:
|
7 |
def __init__(self):
|
8 |
-
# Load the model and processor for Qwen2-VL-7B
|
9 |
self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
10 |
"Qwen/Qwen2-VL-7B-Instruct",
|
11 |
-
torch_dtype=torch.float16,
|
12 |
-
|
13 |
-
device_map="auto" # Automatically assign devices for model
|
14 |
)
|
15 |
self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
16 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
17 |
self.model.to(self.device)
|
18 |
self.model.eval()
|
19 |
|
20 |
-
# Enable gradient checkpointing to save memory
|
21 |
self.model.gradient_checkpointing_enable()
|
22 |
|
23 |
def preprocess(self, request_data):
|
|
|
5 |
|
6 |
class Qwen2VL7bHandler:
|
7 |
def __init__(self):
|
8 |
+
# Load the model and processor for Qwen2-VL-7B without FlashAttention2
|
9 |
self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
10 |
"Qwen/Qwen2-VL-7B-Instruct",
|
11 |
+
torch_dtype=torch.float16, # Use FP16 for reduced memory usage
|
12 |
+
device_map="auto" # Automatically assigns the model to the available GPU(s)
|
|
|
13 |
)
|
14 |
self.processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
15 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
16 |
self.model.to(self.device)
|
17 |
self.model.eval()
|
18 |
|
19 |
+
# Enable gradient checkpointing to save memory
|
20 |
self.model.gradient_checkpointing_enable()
|
21 |
|
22 |
def preprocess(self, request_data):
|