File size: 2,284 Bytes
9391d28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
{'vocab_size': 32000,
'max_position_embeddings': 4096,
'hidden_size': 4096,
'intermediate_size': 11008,
'num_hidden_layers': 32,
'num_attention_heads': 32,
'num_key_value_heads': 32,
'hidden_act': 'silu',
'initializer_range': 0.02,
'rms_norm_eps': 1e-05,
'pretraining_tp': 1,
'use_cache': False,
'rope_scaling': None,
'return_dict': True,
'output_hidden_states': False,
'output_attentions': False,
'torchscript': False,
'torch_dtype': 'float16',
'use_bfloat16': False,
'tf_legacy_loss': False,
'pruned_heads': {},
'tie_word_embeddings': False,
'is_encoder_decoder': False,
'is_decoder': False,
'cross_attention_hidden_size': None,
'add_cross_attention': False,
'tie_encoder_decoder': False,
'max_length': 20,
'min_length': 0,
'do_sample': False,
'early_stopping': False,
'num_beams': 1,
'num_beam_groups': 1,
'diversity_penalty': 0.0,
'temperature': 1.0,
'top_k': 50,
'top_p': 1.0,
'typical_p': 1.0,
'repetition_penalty': 1.0,
'length_penalty': 1.0,
'no_repeat_ngram_size': 0,
'encoder_no_repeat_ngram_size': 0,
'bad_words_ids': None,
'num_return_sequences': 1,
'chunk_size_feed_forward': 0,
'output_scores': False,
'return_dict_in_generate': False,
'forced_bos_token_id': None,
'forced_eos_token_id': None,
'remove_invalid_values': False,
'exponential_decay_length_penalty': None,
'suppress_tokens': None,
'begin_suppress_tokens': None,
'architectures': ['LlamaForCausalLM'],
'finetuning_task': None,
'id2label': {0: 'LABEL_0', 1: 'LABEL_1'},
'label2id': {'LABEL_0': 0, 'LABEL_1': 1},
'tokenizer_class': None,
'prefix': None,
'bos_token_id': 1,
'pad_token_id': None,
'eos_token_id': 2,
'sep_token_id': None,
'decoder_start_token_id': None,
'task_specific_params': None,
'problem_type': None,
'_name_or_path': 'mahimairaja/tweet-summarization-llama-2-finetuned',
'transformers_version': '4.32.1',
'model_type': 'llama',
'quantization_config': {'quant_method': <QuantizationMethod.BITS_AND_BYTES: 'bitsandbytes'>,
'load_in_8bit': False,
'load_in_4bit': True,
'llm_int8_threshold': 6.0,
'llm_int8_skip_modules': None,
'llm_int8_enable_fp32_cpu_offload': False,
'llm_int8_has_fp16_weight': False,
'bnb_4bit_quant_type': 'nf4',
'bnb_4bit_use_double_quant': False,
'bnb_4bit_compute_dtype': 'float16'}} |