Crystalcareai
/

Quiet-Star-Custom

Text Generation

Transformers

Safetensors

quiet

custom_code

Model card Files Files and versions Community

Crystalcareai commited on Mar 26

Commit

4479617

•

1 Parent(s): 8d2b1eb

Update configuration_quiet.py

Browse files

Files changed (1) hide show

configuration_quiet.py +19 -22

configuration_quiet.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # coding=utf-8
-# Copyright 2023 Mistral AI and the HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,26 +12,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" Mistral model configuration"""
-from ...configuration_utils import PretrainedConfig
-from ...utils import logging
 logger = logging.get_logger(__name__)
-from ..deprecated._archive_maps import MISTRAL_PRETRAINED_CONFIG_ARCHIVE_MAP  # noqa: F401, E402
-class MistralConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`MistralModel`]. It is used to instantiate an
-    Mistral model according to the specified arguments, defining the model architecture. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of the Mistral-7B-v0.1 or Mistral-7B-Instruct-v0.1.
-    [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
-    [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
@@ -39,8 +36,8 @@ class MistralConfig(PretrainedConfig):
     Args:
         vocab_size (`int`, *optional*, defaults to 32000):
-            Vocabulary size of the Mistral model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`MistralModel`]
         hidden_size (`int`, *optional*, defaults to 4096):
             Dimension of the hidden representations.
         intermediate_size (`int`, *optional*, defaults to 14336):
@@ -59,7 +56,7 @@ class MistralConfig(PretrainedConfig):
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
             The non-linear activation function (function or string) in the decoder.
         max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
-            The maximum sequence length that this model might ever be used with. Mistral's sliding window attention
             allows sequence of up to 4096*32 tokens.
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
@@ -84,19 +81,19 @@ class MistralConfig(PretrainedConfig):
             The dropout ratio for the attention probabilities.
     ```python
-    >>> from transformers import MistralModel, MistralConfig
-    >>> # Initializing a Mistral 7B style configuration
-    >>> configuration = MistralConfig()
-    >>> # Initializing a model from the Mistral 7B style configuration
-    >>> model = MistralModel(configuration)
     >>> # Accessing the model configuration
     >>> configuration = model.config
     ```"""
-    model_type = "mistral"
     keys_to_ignore_at_inference = ["past_key_values"]
     def __init__(

 # coding=utf-8
+# Copyright 2023 Quiet AI and the HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+""" Quiet model configuration"""
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
 logger = logging.get_logger(__name__)
+class QuietConfig(PretrainedConfig):
     r"""
+    This is the configuration class to store the configuration of a [`QuietModel`]. It is used to instantiate an
+    Quiet model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of the Quiet-7B-v0.1 or Quiet-7B-Instruct-v0.1.
+    [quietai/Quiet-7B-v0.1](https://huggingface.co/quietai/Quiet-7B-v0.1)
+    [quietai/Quiet-7B-Instruct-v0.1](https://huggingface.co/quietai/Quiet-7B-Instruct-v0.1)
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
     Args:
         vocab_size (`int`, *optional*, defaults to 32000):
+            Vocabulary size of the Quiet model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`QuietModel`]
         hidden_size (`int`, *optional*, defaults to 4096):
             Dimension of the hidden representations.
         intermediate_size (`int`, *optional*, defaults to 14336):
         hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
             The non-linear activation function (function or string) in the decoder.
         max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
+            The maximum sequence length that this model might ever be used with. Quiet's sliding window attention
             allows sequence of up to 4096*32 tokens.
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
             The dropout ratio for the attention probabilities.
     ```python
+    >>> from transformers import QuietModel, QuietConfig
+    >>> # Initializing a Quiet 7B style configuration
+    >>> configuration = QuietConfig()
+    >>> # Initializing a model from the Quiet 7B style configuration
+    >>> model = QuietModel(configuration)
     >>> # Accessing the model configuration
     >>> configuration = model.config
     ```"""
+    model_type = "quiet"
     keys_to_ignore_at_inference = ["past_key_values"]
     def __init__(