yuewang-sf commited on
Commit
9e62c4f
·
1 Parent(s): a18f941

update model files

Browse files
configuration_codet5p_bimodal.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023 Salesforce authors, The EleutherAI, and HuggingFace Teams. All rights reserved.
3
+
4
+ """ CodeT5+ bimodal model configuration"""
5
+ from transformers.configuration_utils import PretrainedConfig
6
+ from transformers.utils import logging
7
+
8
+ logger = logging.get_logger(__name__)
9
+
10
+
11
+ class CodeT5pBimodalConfig(PretrainedConfig):
12
+ model_type = "codet5p_bimodal"
13
+ keys_to_ignore_at_inference = ["past_key_values"]
14
+ attribute_map = {"hidden_size": "d_model", "num_attention_heads": "num_heads", "num_hidden_layers": "num_layers"}
15
+
16
+ def __init__(
17
+ self,
18
+ vocab_size=32103,
19
+ d_model=768,
20
+ embed_dim=256,
21
+ d_kv=64,
22
+ d_ff=3072,
23
+ num_layers=12,
24
+ num_decoder_layers=None,
25
+ num_heads=12,
26
+ relative_attention_num_buckets=32,
27
+ relative_attention_max_distance=128,
28
+ dropout_rate=0.1,
29
+ layer_norm_epsilon=1e-6,
30
+ initializer_factor=1.0,
31
+ feed_forward_proj="relu",
32
+ is_encoder_decoder=False,
33
+ use_cache=True,
34
+ pad_token_id=0,
35
+ eos_token_id=2,
36
+ **kwargs
37
+ ):
38
+ self.vocab_size = vocab_size
39
+ self.d_model = d_model
40
+ self.embed_dim = embed_dim
41
+ self.d_kv = d_kv
42
+ self.d_ff = d_ff
43
+ self.num_layers = num_layers
44
+ self.num_decoder_layers = (
45
+ num_decoder_layers if num_decoder_layers is not None else self.num_layers
46
+ ) # default = symmetry
47
+ self.num_heads = num_heads
48
+ self.relative_attention_num_buckets = relative_attention_num_buckets
49
+ self.relative_attention_max_distance = relative_attention_max_distance
50
+ self.dropout_rate = dropout_rate
51
+ self.layer_norm_epsilon = layer_norm_epsilon
52
+ self.initializer_factor = initializer_factor
53
+ self.feed_forward_proj = feed_forward_proj
54
+ self.use_cache = use_cache
55
+
56
+ act_info = self.feed_forward_proj.split("-")
57
+ self.dense_act_fn = act_info[-1]
58
+ self.is_gated_act = act_info[0] == "gated"
59
+
60
+ if len(act_info) > 1 and act_info[0] != "gated" or len(act_info) > 2:
61
+ raise ValueError(
62
+ f"`feed_forward_proj`: {feed_forward_proj} is not a valid activation function of the dense layer."
63
+ "Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. "
64
+ "'gated-gelu' or 'relu'"
65
+ )
66
+
67
+ # for backwards compatibility
68
+ if feed_forward_proj == "gated-gelu":
69
+ self.dense_act_fn = "gelu_new"
70
+
71
+ super().__init__(
72
+ pad_token_id=pad_token_id,
73
+ eos_token_id=eos_token_id,
74
+ is_encoder_decoder=is_encoder_decoder,
75
+ **kwargs,
76
+ )
modeling_codet5p_bimodal.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023 Salesforce authors, The EleutherAI, and HuggingFace Teams. All rights reserved.
3
+ """ PyTorch CodeT5+ matching models.
4
+ The implementation is based on transformers.models.t5.modeling_t5 by adding a projection layer on T5EncoderModel
5
+ """
6
+
7
+ from typing import Optional, Tuple, Union
8
+ import torch
9
+ from torch import nn
10
+ import torch.nn.functional as F
11
+ from transformers import T5ForConditionalGeneration
12
+ from transformers.modeling_outputs import (
13
+ BaseModelOutput,
14
+ )
15
+ from configuration_codet5p_bimodal import CodeT5pBimodalConfig
16
+
17
+
18
+ class CodeT5pBimodalModel(T5ForConditionalGeneration):
19
+ config_class = CodeT5pBimodalConfig
20
+
21
+ authorized_missing_keys = [
22
+ r"encoder.embed_tokens.weight",
23
+ ]
24
+
25
+ def __init__(self, config: CodeT5pBimodalConfig):
26
+ super().__init__(config)
27
+ self.proj = nn.Linear(config.d_model, config.embed_dim)
28
+ self.itm_head = nn.Linear(config.d_model, 2)