nikravan commited on
Commit
a1b2c23
Β·
verified Β·
1 Parent(s): c9ea116

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -8,7 +8,7 @@ import spaces
8
  import gradio as gr
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
10
 
11
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
14
  CHAT_TEMPLATE = "َAuto"
@@ -103,8 +103,8 @@ model = AutoModelForCausalLM.from_pretrained(
103
  MODEL_ID,
104
  device_map="auto",
105
  #quantization_config=quantization_config,
106
- attn_implementation="flash_attention_2",
107
- dtype=torch.bfloat16
108
  )
109
 
110
  # Create Gradio interface
 
8
  import gradio as gr
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
10
 
11
+ #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
14
  CHAT_TEMPLATE = "َAuto"
 
103
  MODEL_ID,
104
  device_map="auto",
105
  #quantization_config=quantization_config,
106
+ #attn_implementation="flash_attention_2",
107
+
108
  )
109
 
110
  # Create Gradio interface