Update README.md
Browse files
README.md
CHANGED
@@ -26,8 +26,8 @@ library_name: transformers
|
|
26 |
|
27 |
... OK tokenizer seems a bit off
|
28 |
|
29 |
-
|
30 |
-
|
31 |
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
32 |
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
33 |
ggml_cuda_init: found 2 CUDA devices:
|
@@ -177,7 +177,7 @@ llama_perf_context_print: load time = 1693.08 ms
|
|
177 |
llama_perf_context_print: prompt eval time = 26.42 ms / 7 tokens ( 3.77 ms per token, 264.96 tokens per second)
|
178 |
llama_perf_context_print: eval time = 3993.62 ms / 238 runs ( 16.78 ms per token, 59.60 tokens per second)
|
179 |
llama_perf_context_print: total time = 4034.65 ms / 245 tokens
|
180 |
-
|
181 |
|
182 |
----
|
183 |
|
|
|
26 |
|
27 |
... OK tokenizer seems a bit off
|
28 |
|
29 |
+
```
|
30 |
+
llama-cli -m phi-4.etf16-Q6_K.gguf -p "Tell me a joke." -n 256 -t 8 -c 2048 --temp 0.8 -ngl 99
|
31 |
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
|
32 |
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
|
33 |
ggml_cuda_init: found 2 CUDA devices:
|
|
|
177 |
llama_perf_context_print: prompt eval time = 26.42 ms / 7 tokens ( 3.77 ms per token, 264.96 tokens per second)
|
178 |
llama_perf_context_print: eval time = 3993.62 ms / 238 runs ( 16.78 ms per token, 59.60 tokens per second)
|
179 |
llama_perf_context_print: total time = 4034.65 ms / 245 tokens
|
180 |
+
```
|
181 |
|
182 |
----
|
183 |
|