Spaces:
Running
on
Zero
Running
on
Zero
BramVanroy
commited on
Commit
•
974c121
1
Parent(s):
9a56c6d
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
|
|
17 |
|
18 |
model_id = "BramVanroy/fietje-2b-chat"
|
19 |
avatar_url = "https://huggingface.co/spaces/BramVanroy/fietje-2b/resolve/main/img/fietje-2b-avatar.png"
|
20 |
-
model = AutoModelForCausalLM.from_pretrained(model_id,
|
21 |
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
23 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
@@ -116,7 +116,7 @@ chat_interface = gr.ChatInterface(
|
|
116 |
minimum=0,
|
117 |
maximum=20,
|
118 |
step=1,
|
119 |
-
value=
|
120 |
),
|
121 |
gr.Checkbox(
|
122 |
label="Do sample",
|
|
|
17 |
|
18 |
model_id = "BramVanroy/fietje-2b-chat"
|
19 |
avatar_url = "https://huggingface.co/spaces/BramVanroy/fietje-2b/resolve/main/img/fietje-2b-avatar.png"
|
20 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True, attn_implementation="flash_attention_2")
|
21 |
model = model.to("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
23 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
|
116 |
minimum=0,
|
117 |
maximum=20,
|
118 |
step=1,
|
119 |
+
value=0,
|
120 |
),
|
121 |
gr.Checkbox(
|
122 |
label="Do sample",
|