|
{ |
|
"max_new_tokens": 200, |
|
"seed": -1.0, |
|
"temperature": 0.7, |
|
"top_p": 0.14, |
|
"top_k": 49, |
|
"typical_p": 1, |
|
"epsilon_cutoff": 0, |
|
"eta_cutoff": 0, |
|
"repetition_penalty": 1.17, |
|
"repetition_penalty_range": 0, |
|
"encoder_repetition_penalty": 1, |
|
"no_repeat_ngram_size": 0, |
|
"min_length": 0, |
|
"do_sample": true, |
|
"penalty_alpha": 0, |
|
"num_beams": 1, |
|
"length_penalty": 1, |
|
"early_stopping": false, |
|
"mirostat_mode": 0, |
|
"mirostat_tau": 5, |
|
"mirostat_eta": 0.1, |
|
"add_bos_token": true, |
|
"ban_eos_token": false, |
|
"truncation_length": 4096, |
|
"custom_stopping_strings": "", |
|
"skip_special_tokens": true, |
|
"stream": true, |
|
"tfs": 1, |
|
"top_a": 0, |
|
"textbox": "###USER: hola como estas?\n###ASSISTANT:", |
|
"output_textbox": "###USER: hola como estas?\n###ASSISTANT: Hola! Como est\u00e1s hoy? (How are you today?)\n\n", |
|
"loader": "llama.cpp", |
|
"cpu_memory": 0, |
|
"auto_devices": false, |
|
"disk": false, |
|
"cpu": false, |
|
"bf16": false, |
|
"load_in_8bit": false, |
|
"trust_remote_code": false, |
|
"load_in_4bit": false, |
|
"compute_dtype": "float16", |
|
"quant_type": "nf4", |
|
"use_double_quant": false, |
|
"wbits": "None", |
|
"groupsize": "None", |
|
"model_type": "llama", |
|
"pre_layer": 0, |
|
"triton": false, |
|
"desc_act": false, |
|
"no_inject_fused_attention": false, |
|
"no_inject_fused_mlp": false, |
|
"no_use_cuda_fp16": false, |
|
"threads": 0, |
|
"n_batch": 512, |
|
"no_mmap": false, |
|
"low_vram": false, |
|
"mlock": false, |
|
"n_gpu_layers": 0, |
|
"n_ctx": 2048, |
|
"n_gqa": 0, |
|
"rms_norm_eps": 5e-06, |
|
"llama_cpp_seed": 0.0, |
|
"gpu_split": "", |
|
"max_seq_len": 2048, |
|
"compress_pos_emb": 1, |
|
"alpha_value": 1 |
|
} |