|
{'vocab_size': 32000, |
|
'max_position_embeddings': 4096, |
|
'hidden_size': 4096, |
|
'intermediate_size': 11008, |
|
'num_hidden_layers': 32, |
|
'num_attention_heads': 32, |
|
'num_key_value_heads': 32, |
|
'hidden_act': 'silu', |
|
'initializer_range': 0.02, |
|
'rms_norm_eps': 1e-05, |
|
'pretraining_tp': 1, |
|
'use_cache': False, |
|
'rope_scaling': None, |
|
'return_dict': True, |
|
'output_hidden_states': False, |
|
'output_attentions': False, |
|
'torchscript': False, |
|
'torch_dtype': 'float16', |
|
'use_bfloat16': False, |
|
'tf_legacy_loss': False, |
|
'pruned_heads': {}, |
|
'tie_word_embeddings': False, |
|
'is_encoder_decoder': False, |
|
'is_decoder': False, |
|
'cross_attention_hidden_size': None, |
|
'add_cross_attention': False, |
|
'tie_encoder_decoder': False, |
|
'max_length': 20, |
|
'min_length': 0, |
|
'do_sample': False, |
|
'early_stopping': False, |
|
'num_beams': 1, |
|
'num_beam_groups': 1, |
|
'diversity_penalty': 0.0, |
|
'temperature': 1.0, |
|
'top_k': 50, |
|
'top_p': 1.0, |
|
'typical_p': 1.0, |
|
'repetition_penalty': 1.0, |
|
'length_penalty': 1.0, |
|
'no_repeat_ngram_size': 0, |
|
'encoder_no_repeat_ngram_size': 0, |
|
'bad_words_ids': None, |
|
'num_return_sequences': 1, |
|
'chunk_size_feed_forward': 0, |
|
'output_scores': False, |
|
'return_dict_in_generate': False, |
|
'forced_bos_token_id': None, |
|
'forced_eos_token_id': None, |
|
'remove_invalid_values': False, |
|
'exponential_decay_length_penalty': None, |
|
'suppress_tokens': None, |
|
'begin_suppress_tokens': None, |
|
'architectures': ['LlamaForCausalLM'], |
|
'finetuning_task': None, |
|
'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, |
|
'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, |
|
'tokenizer_class': None, |
|
'prefix': None, |
|
'bos_token_id': 1, |
|
'pad_token_id': None, |
|
'eos_token_id': 2, |
|
'sep_token_id': None, |
|
'decoder_start_token_id': None, |
|
'task_specific_params': None, |
|
'problem_type': None, |
|
'_name_or_path': 'mahimairaja/tweet-summarization-llama-2-finetuned', |
|
'transformers_version': '4.32.1', |
|
'model_type': 'llama', |
|
'quantization_config': {'quant_method': <QuantizationMethod.BITS_AND_BYTES: 'bitsandbytes'>, |
|
'load_in_8bit': False, |
|
'load_in_4bit': True, |
|
'llm_int8_threshold': 6.0, |
|
'llm_int8_skip_modules': None, |
|
'llm_int8_enable_fp32_cpu_offload': False, |
|
'llm_int8_has_fp16_weight': False, |
|
'bnb_4bit_quant_type': 'nf4', |
|
'bnb_4bit_use_double_quant': False, |
|
'bnb_4bit_compute_dtype': 'float16'}} |