kreas commited on
Commit
a45b339
1 Parent(s): 12902c5

Upload /Llama-2-7b-hf/int8_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json with huggingface_hub

Browse files
Llama-2-7b-hf/int8_batch_size_1_sq_len_256_new_tokens_256/experiment_config.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": {
3
+ "name": "pytorch",
4
+ "version": "2.2.1+cu118",
5
+ "_target_": "optimum_benchmark.backends.pytorch.backend.PyTorchBackend",
6
+ "model": "meta-llama/Llama-2-7b-hf",
7
+ "task": "text-generation",
8
+ "library": "transformers",
9
+ "device": "cuda",
10
+ "device_ids": "0",
11
+ "seed": 42,
12
+ "inter_op_num_threads": null,
13
+ "intra_op_num_threads": null,
14
+ "hub_kwargs": {
15
+ "revision": "main",
16
+ "force_download": false,
17
+ "local_files_only": false,
18
+ "trust_remote_code": false
19
+ },
20
+ "no_weights": true,
21
+ "device_map": null,
22
+ "torch_dtype": "float16",
23
+ "amp_autocast": false,
24
+ "amp_dtype": null,
25
+ "eval_mode": true,
26
+ "to_bettertransformer": false,
27
+ "low_cpu_mem_usage": null,
28
+ "attn_implementation": "flash_attention_2",
29
+ "cache_implementation": null,
30
+ "torch_compile": false,
31
+ "torch_compile_config": {},
32
+ "quantization_scheme": "bnb",
33
+ "quantization_config": {
34
+ "llm_int8_threshold": 0.0,
35
+ "load_in_8bit": true
36
+ },
37
+ "deepspeed_inference": false,
38
+ "deepspeed_inference_config": {},
39
+ "peft_type": null,
40
+ "peft_config": {}
41
+ },
42
+ "launcher": {
43
+ "name": "process",
44
+ "_target_": "optimum_benchmark.launchers.process.launcher.ProcessLauncher",
45
+ "device_isolation": false,
46
+ "start_method": "spawn"
47
+ },
48
+ "benchmark": {
49
+ "name": "inference",
50
+ "_target_": "optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark",
51
+ "duration": 10,
52
+ "warmup_runs": 10,
53
+ "input_shapes": {
54
+ "batch_size": 1,
55
+ "num_choices": 2,
56
+ "sequence_length": 256
57
+ },
58
+ "new_tokens": null,
59
+ "energy": false,
60
+ "memory": true,
61
+ "latency": true,
62
+ "forward_kwargs": {},
63
+ "generate_kwargs": {
64
+ "max_new_tokens": 256,
65
+ "min_new_tokens": 256
66
+ },
67
+ "call_kwargs": {}
68
+ },
69
+ "experiment_name": "Llama-2-7b-hf-int8",
70
+ "task": null,
71
+ "model": null,
72
+ "device": null,
73
+ "library": null,
74
+ "environment": {
75
+ "cpu": " AMD Ryzen Threadripper PRO 5995WX 64-Cores",
76
+ "cpu_count": 128,
77
+ "cpu_ram_mb": 134841.131008,
78
+ "system": "Linux",
79
+ "machine": "x86_64",
80
+ "platform": "Linux-6.5.0-14-generic-x86_64-with-glibc2.35",
81
+ "processor": "x86_64",
82
+ "python_version": "3.10.12",
83
+ "gpu": [
84
+ "NVIDIA GeForce RTX 4090"
85
+ ],
86
+ "gpu_count": 1,
87
+ "gpu_vram_mb": 25757220864,
88
+ "optimum_benchmark_version": "0.2.0",
89
+ "optimum_benchmark_commit": "5bf349dbbc5ecdbf6ca94ac70f80ac44bd84dcc0",
90
+ "transformers_version": "4.38.2",
91
+ "transformers_commit": null,
92
+ "accelerate_version": "0.28.0",
93
+ "accelerate_commit": null,
94
+ "diffusers_version": null,
95
+ "diffusers_commit": null,
96
+ "optimum_version": null,
97
+ "optimum_commit": null,
98
+ "timm_version": null,
99
+ "timm_commit": null,
100
+ "peft_version": null,
101
+ "peft_commit": null
102
+ }
103
+ }