nikravan commited on
Commit
8f997e4
Β·
verified Β·
1 Parent(s): 6814d81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -10,7 +10,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
10
 
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
- MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
14
  CHAT_TEMPLATE = "َAuto"
15
  MODEL_NAME = MODEL_ID.split("/")[-1]
16
  CONTEXT_LENGTH = 16000
@@ -102,7 +102,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
102
  model = AutoModelForCausalLM.from_pretrained(
103
  MODEL_ID,
104
  device_map="auto",
105
- quantization_config=quantization_config,
106
  attn_implementation="flash_attention_2",
107
  )
108
 
 
10
 
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
+ MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
14
  CHAT_TEMPLATE = "َAuto"
15
  MODEL_NAME = MODEL_ID.split("/")[-1]
16
  CONTEXT_LENGTH = 16000
 
102
  model = AutoModelForCausalLM.from_pretrained(
103
  MODEL_ID,
104
  device_map="auto",
105
+ #quantization_config=quantization_config,
106
  attn_implementation="flash_attention_2",
107
  )
108