nikravan commited on
Commit
c04f5fc
·
verified ·
1 Parent(s): 3a587da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  from PIL import Image
3
  import gradio as gr
4
  import spaces
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer,BitsAndBytesConfig
6
  import os
7
  from threading import Thread
8
 
@@ -12,7 +12,7 @@ from pptx import Presentation
12
 
13
 
14
  MODEL_LIST = ["nikravan/glm-4vq"]
15
- #MODEL_LIST = ["../Model_4b_sharded"]
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
  MODEL_ID = MODEL_LIST[0]
18
  MODEL_NAME = "GLM-4vq"
@@ -32,19 +32,12 @@ h1 {
32
  display: block;
33
  }
34
  """
35
- inference_dtype=torch.bfloat16
36
- quantization_config = BitsAndBytesConfig(
37
- load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
38
- )
39
 
40
  model = AutoModelForCausalLM.from_pretrained(
41
  MODEL_ID,
42
- torch_dtype=inference_dtype,
43
- device_map = "cuda:0",
44
  low_cpu_mem_usage=True,
45
- trust_remote_code=True,
46
-
47
- quantization_config=quantization_config
48
  )
49
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
50
  model.eval()
 
2
  from PIL import Image
3
  import gradio as gr
4
  import spaces
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  import os
7
  from threading import Thread
8
 
 
12
 
13
 
14
  MODEL_LIST = ["nikravan/glm-4vq"]
15
+
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
  MODEL_ID = MODEL_LIST[0]
18
  MODEL_NAME = "GLM-4vq"
 
32
  display: block;
33
  }
34
  """
 
 
 
 
35
 
36
  model = AutoModelForCausalLM.from_pretrained(
37
  MODEL_ID,
38
+ torch_dtype=torch.bfloat16,
 
39
  low_cpu_mem_usage=True,
40
+ trust_remote_code=True
 
 
41
  )
42
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
43
  model.eval()