import os from typing import Any, Dict, List, Optional, Tuple, Union from transformers import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class Qwen2VLVisionConfig(PretrainedConfig): model_type = "qwen2_vit" def __init__( self, depth=32, embed_dim=1280, hidden_size=3584, hidden_act="quick_gelu", mlp_ratio=4, num_heads=16, in_channels=3, patch_size=14, spatial_merge_size=2, temporal_patch_size=2, **kwargs, ): super().__init__(**kwargs) self.depth = depth self.embed_dim = embed_dim self.hidden_size = hidden_size self.hidden_act = hidden_act self.mlp_ratio = mlp_ratio self.num_heads = num_heads self.in_channels = in_channels self.patch_size = patch_size self.spatial_merge_size = spatial_merge_size self.temporal_patch_size = temporal_patch_size @classmethod def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig": cls._set_token_in_kwargs(kwargs) config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) if config_dict.get("model_type") == "qwen2_vl": config_dict = config_dict["vision_config"] if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type: logger.warning( f"You are using a model of type {config_dict['model_type']} to instantiate a model of type " f"{cls.model_type}. This is not supported for all configurations of models and can yield errors." ) return cls.from_dict(config_dict, **kwargs)