vilarin commited on
Commit
8a59c8f
·
verified ·
1 Parent(s): 0555f23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -14,9 +14,10 @@ import gradio as gr
14
  from threading import Thread
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
- MODEL = "NousResearch/Hermes-3-Llama-3.2-3B"
 
18
 
19
- TITLE = "<h1><center>Hermes-3-Llama-3.2-3B</center></h1>"
20
 
21
  PLACEHOLDER = """
22
  <center>
@@ -45,13 +46,14 @@ quantization_config = BitsAndBytesConfig(
45
  bnb_4bit_use_double_quant=True,
46
  bnb_4bit_quant_type= "nf4")
47
 
48
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
49
  model = AutoModelForCausalLM.from_pretrained(
50
  MODEL,
51
  torch_dtype=torch.float16,
52
  device_map="auto",
 
53
  attn_implementation="flash_attention_2",
54
- quantization_config=quantization_config)
55
 
56
  # Ensure `pad_token_id` is set
57
  if tokenizer.pad_token_id is None:
@@ -82,7 +84,7 @@ def stream_chat(
82
 
83
  conversation.append({"role": "user", "content": message})
84
 
85
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
86
 
87
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
88
 
 
14
  from threading import Thread
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
+ MODEL = "evabyte/EvaByte-SFT"
18
+ MODEL_BASE = "evabyte/EvaByte"
19
 
20
+ TITLE = "<h1><center>EvaByte</center></h1>"
21
 
22
  PLACEHOLDER = """
23
  <center>
 
46
  bnb_4bit_use_double_quant=True,
47
  bnb_4bit_quant_type= "nf4")
48
 
49
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  MODEL,
52
  torch_dtype=torch.float16,
53
  device_map="auto",
54
+ trust_remote_code=True,
55
  attn_implementation="flash_attention_2",
56
+ quantization_config=quantization_config).eval().to(device)
57
 
58
  # Ensure `pad_token_id` is set
59
  if tokenizer.pad_token_id is None:
 
84
 
85
  conversation.append({"role": "user", "content": message})
86
 
87
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(device)
88
 
89
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
90