CSUMLM / config.json
Or4cl3-1's picture
Update config.json
3523256 verified
{
"model_type": "encoder_decoder",
"encoder_type": "csumlm_encoder",
"decoder_type": "csumlm_decoder",
"model_name": "CognoSphere/CSUMLM",
"model_description": "CognoSphere Unified Multimodal Language Model (CSUMLM) is an advanced AI model capable of processing and generating text, images, and audio data. It combines transfer learning, deep learning, self-supervised learning, meta-learning, deep meta-learning, reinforcement learning, and cross-domain analogy extraction to achieve state-of-the-art performance in multimodal tasks.",
"encoder": {
"type": "transformer",
"num_layers": 12,
"hidden_size": 768,
"num_attention_heads": 12,
"intermediate_size": 3072
},
"decoder": {
"type": "transformer",
"num_layers": 12,
"hidden_size": 768,
"num_attention_heads": 12,
"intermediate_size": 3072
},
"multimodal_fusion": {
"type": "transformer",
"num_layers": 6,
"hidden_size": 1024,
"num_attention_heads": 16,
"intermediate_size": 4096
},
"training_data": {
"text": [
"path/to/text/data/file1.txt",
"path/to/text/data/file2.txt",
"..."
],
"images": [
"path/to/image/data/image1.jpg",
"path/to/image/data/image2.png",
"..."
],
"audio": [
"path/to/audio/data/audio1.wav",
"path/to/audio/data/audio2.mp3",
"..."
]
},
"tokenizer": {
"type": "byte-level-bpe",
"vocab_size": 50000,
"merge_file": "path/to/bpe/merge_file.txt"
},
"optimizer": {
"type": "adamw",
"learning_rate": 5e-5,
"weight_decay": 0.01
},
"loss_function": "cross_entropy",
"evaluation_metrics": [
"bleu",
"meteor",
"rouge",
"cider"
]
}