jupyterjazz
commited on
Commit
•
9db6c6f
1
Parent(s):
77af1c7
Update configuration_xlm_roberta.py
Browse files
configuration_xlm_roberta.py
CHANGED
@@ -21,6 +21,7 @@ class XLMRobertaFlashConfig(PretrainedConfig):
|
|
21 |
position_embedding_type="absolute",
|
22 |
use_cache=True,
|
23 |
classifier_dropout=None,
|
|
|
24 |
**kwargs,
|
25 |
):
|
26 |
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
@@ -39,4 +40,6 @@ class XLMRobertaFlashConfig(PretrainedConfig):
|
|
39 |
self.layer_norm_eps = layer_norm_eps
|
40 |
self.position_embedding_type = position_embedding_type
|
41 |
self.use_cache = use_cache
|
42 |
-
self.classifier_dropout = classifier_dropout
|
|
|
|
|
|
21 |
position_embedding_type="absolute",
|
22 |
use_cache=True,
|
23 |
classifier_dropout=None,
|
24 |
+
use_flash_attn=True
|
25 |
**kwargs,
|
26 |
):
|
27 |
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
|
|
40 |
self.layer_norm_eps = layer_norm_eps
|
41 |
self.position_embedding_type = position_embedding_type
|
42 |
self.use_cache = use_cache
|
43 |
+
self.classifier_dropout = classifier_dropout
|
44 |
+
self.use_flash_attn = use_flash_attn
|
45 |
+
|