Wonder-Griffin
commited on
Commit
•
2d8d1b2
1
Parent(s):
f1852c9
Upload JudgeXL
Browse files- README.md +3 -3
- config.json +35 -36
- judge_xl_model.py +140 -0
- model.safetensors +2 -2
README.md
CHANGED
@@ -2,8 +2,6 @@
|
|
2 |
base_model:
|
3 |
- Wonder-Griffin/XL-Judge-LLM
|
4 |
- Wonder-Griffin/Judge-GPT2
|
5 |
-
library_name: transformers
|
6 |
-
license: wtfpl
|
7 |
datasets:
|
8 |
- fka/awesome-chatgpt-prompts
|
9 |
- BAAI/Infinity-Instruct
|
@@ -13,6 +11,8 @@ datasets:
|
|
13 |
- Salesforce/wikitext
|
14 |
language:
|
15 |
- en
|
|
|
|
|
16 |
metrics:
|
17 |
- f1
|
18 |
- accuracy
|
@@ -21,7 +21,7 @@ metrics:
|
|
21 |
pipeline_tag: text-generation
|
22 |
tags:
|
23 |
- text-generation-inference
|
24 |
-
inference:
|
25 |
---
|
26 |
|
27 |
# Model Card for Model ID
|
|
|
2 |
base_model:
|
3 |
- Wonder-Griffin/XL-Judge-LLM
|
4 |
- Wonder-Griffin/Judge-GPT2
|
|
|
|
|
5 |
datasets:
|
6 |
- fka/awesome-chatgpt-prompts
|
7 |
- BAAI/Infinity-Instruct
|
|
|
11 |
- Salesforce/wikitext
|
12 |
language:
|
13 |
- en
|
14 |
+
library_name: transformers
|
15 |
+
license: wtfpl
|
16 |
metrics:
|
17 |
- f1
|
18 |
- accuracy
|
|
|
21 |
pipeline_tag: text-generation
|
22 |
tags:
|
23 |
- text-generation-inference
|
24 |
+
inference: true
|
25 |
---
|
26 |
|
27 |
# Model Card for Model ID
|
config.json
CHANGED
@@ -1,36 +1,35 @@
|
|
1 |
-
{
|
2 |
-
"
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
"
|
10 |
-
"
|
11 |
-
"
|
12 |
-
"
|
13 |
-
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
-
|
19 |
-
|
20 |
-
"
|
21 |
-
|
22 |
-
"
|
23 |
-
"
|
24 |
-
"
|
25 |
-
"
|
26 |
-
|
27 |
-
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
|
36 |
-
}
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"JudgeXL"
|
4 |
+
],
|
5 |
+
"auto_map": {
|
6 |
+
"AutoConfig": "judge_xl_model.JudgeXLConfig",
|
7 |
+
"AutoModelForCausalLM": "judge_xl_model.JudgeXL"
|
8 |
+
},
|
9 |
+
"dropout": 0.1,
|
10 |
+
"ff_expansion_factor": 4,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "LABEL_0",
|
14 |
+
"1": "LABEL_1",
|
15 |
+
"2": "LABEL_2",
|
16 |
+
"3": "LABEL_3",
|
17 |
+
"4": "LABEL_4"
|
18 |
+
},
|
19 |
+
"is_decoder": true,
|
20 |
+
"label2id": {
|
21 |
+
"LABEL_0": 0,
|
22 |
+
"LABEL_1": 1,
|
23 |
+
"LABEL_2": 2,
|
24 |
+
"LABEL_3": 3,
|
25 |
+
"LABEL_4": 4
|
26 |
+
},
|
27 |
+
"max_len": 256,
|
28 |
+
"model_type": "judge-xl",
|
29 |
+
"n_head": 12,
|
30 |
+
"n_layer": 12,
|
31 |
+
"rnn_units": 768,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.44.2",
|
34 |
+
"vocab_size": 50276
|
35 |
+
}
|
|
judge_xl_model.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import numpy as np
|
4 |
+
from transformers import PreTrainedModel, PretrainedConfig, AutoModelForCausalLM, AutoConfig
|
5 |
+
import logging
|
6 |
+
logging.basicConfig(level=logging.INFO)
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
torch.autograd.set_detect_anomaly(True)
|
10 |
+
|
11 |
+
class JudgeXLConfig(PretrainedConfig):
|
12 |
+
model_type = "judge-xl"
|
13 |
+
|
14 |
+
def __init__(self, vocab_size=50276, hidden_size=768, max_len=256, n_layer=12, n_head=12,
|
15 |
+
ff_expansion_factor=4, rnn_units=768, num_labels=5, dropout=0.1, **kwargs):
|
16 |
+
super().__init__(**kwargs)
|
17 |
+
self.vocab_size = vocab_size
|
18 |
+
self.hidden_size = hidden_size
|
19 |
+
self.max_len = max_len
|
20 |
+
self.n_layer = n_layer
|
21 |
+
self.n_head = n_head
|
22 |
+
self.ff_expansion_factor = ff_expansion_factor
|
23 |
+
self.rnn_units = rnn_units
|
24 |
+
self.num_labels = num_labels
|
25 |
+
self.dropout = dropout
|
26 |
+
self.is_decoder = True
|
27 |
+
|
28 |
+
class CustomEmbedding(nn.Module):
|
29 |
+
def __init__(self, vocab_size, hidden_size):
|
30 |
+
super(CustomEmbedding, self).__init__()
|
31 |
+
self.embedding = nn.Embedding(vocab_size, hidden_size)
|
32 |
+
|
33 |
+
def forward(self, inputs):
|
34 |
+
return self.embedding(inputs)
|
35 |
+
|
36 |
+
class PositionalEncoding(nn.Module):
|
37 |
+
def __init__(self, n_embd, max_len=5000):
|
38 |
+
super(PositionalEncoding, self).__init__()
|
39 |
+
self.n_embd = n_embd
|
40 |
+
self.max_len = max_len
|
41 |
+
pe = torch.zeros(max_len, n_embd)
|
42 |
+
position = torch.arange(0, max_len).unsqueeze(1).float()
|
43 |
+
div_term = torch.exp(torch.arange(0, n_embd, 2).float() * -(np.log(10000.0) / n_embd))
|
44 |
+
pe[:, 0::2] = torch.sin(position * div_term)
|
45 |
+
pe[:, 1::2] = torch.cos(position * div_term)
|
46 |
+
pe = pe.unsqueeze(0).transpose(0, 1)
|
47 |
+
self.register_buffer('pe', pe)
|
48 |
+
|
49 |
+
def forward(self, x):
|
50 |
+
return x + self.pe[:x.size(0), :]
|
51 |
+
|
52 |
+
class TransformerXLBlock(nn.Module):
|
53 |
+
def __init__(self, config):
|
54 |
+
super(TransformerXLBlock, self).__init__()
|
55 |
+
self.attn = nn.MultiheadAttention(config.hidden_size, config.n_head, dropout=config.dropout)
|
56 |
+
self.ff = FeedForward(config)
|
57 |
+
self.ln1 = nn.LayerNorm(config.hidden_size)
|
58 |
+
self.ln2 = nn.LayerNorm(config.hidden_size)
|
59 |
+
|
60 |
+
def forward(self, x, mask=None):
|
61 |
+
attn_out, _ = self.attn(x, x, x, attn_mask=mask)
|
62 |
+
out1 = self.ln1(x + attn_out)
|
63 |
+
ff_out = self.ff(out1)
|
64 |
+
return self.ln2(out1 + ff_out)
|
65 |
+
|
66 |
+
class FeedForward(nn.Module):
|
67 |
+
def __init__(self, config):
|
68 |
+
super(FeedForward, self).__init__()
|
69 |
+
self.dense1 = nn.Linear(config.hidden_size, config.hidden_size * config.ff_expansion_factor)
|
70 |
+
self.dense2 = nn.Linear(config.hidden_size * config.ff_expansion_factor, config.hidden_size)
|
71 |
+
self.dropout = nn.Dropout(config.dropout)
|
72 |
+
|
73 |
+
def forward(self, x):
|
74 |
+
x = torch.nn.functional.gelu(self.dense1(x))
|
75 |
+
x = self.dropout(x)
|
76 |
+
return self.dense2(x)
|
77 |
+
|
78 |
+
class JudgeXL(PreTrainedModel):
|
79 |
+
config_class = JudgeXLConfig
|
80 |
+
|
81 |
+
def __init__(self, config):
|
82 |
+
super().__init__(config)
|
83 |
+
self.token_embedding = CustomEmbedding(config.vocab_size, config.hidden_size)
|
84 |
+
self.pos_encoding = PositionalEncoding(config.hidden_size, config.max_len)
|
85 |
+
self.transformer_blocks = nn.ModuleList([TransformerXLBlock(config) for _ in range(config.n_layer)])
|
86 |
+
self.ln_f = nn.LayerNorm(config.hidden_size)
|
87 |
+
self.rnn = nn.LSTM(config.hidden_size, config.rnn_units, num_layers=2, dropout=config.dropout, bidirectional=True, batch_first=True)
|
88 |
+
self.fc = nn.Linear(config.rnn_units * 2, config.vocab_size)
|
89 |
+
self.lm_head = nn.Linear(config.rnn_units, config.vocab_size)
|
90 |
+
self.post_init()
|
91 |
+
|
92 |
+
def forward(self, x, mask=None):
|
93 |
+
x = self.token_embedding(x)
|
94 |
+
x = self.pos_encoding(x)
|
95 |
+
for block in self.transformer_blocks:
|
96 |
+
x = block(x, mask=mask)
|
97 |
+
x = self.ln_f(x)
|
98 |
+
x, _ = self.rnn(x)
|
99 |
+
x = self.fc(x)
|
100 |
+
x = self.lm_head(x)
|
101 |
+
return x
|
102 |
+
def init_weights(self):
|
103 |
+
"""
|
104 |
+
Initialize weights for your custom layers using PreTrainedModel's default weight initialization method.
|
105 |
+
"""
|
106 |
+
# Hugging Face’s PreTrainedModel has a standard method for initializing weights
|
107 |
+
super().init_weights()
|
108 |
+
def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
|
109 |
+
if past is None:
|
110 |
+
return {"input_ids": input_ids}
|
111 |
+
else:
|
112 |
+
return {"input_ids": input_ids[:, -1:], "past_key_values": past}
|
113 |
+
|
114 |
+
def _reorder_cache(self, past, beam_idx):
|
115 |
+
return tuple(layer_past.index_select(1, beam_idx) for layer_past in past)
|
116 |
+
def generate(self, prompt, max_len=100):
|
117 |
+
self.eval()
|
118 |
+
input_ids = self.tokenizer(prompt, return_tensors='pt').input_ids
|
119 |
+
generated = input_ids
|
120 |
+
with torch.no_grad():
|
121 |
+
for _ in range(max_len):
|
122 |
+
outputs = self.forward(generated)
|
123 |
+
next_token_logits = outputs[:, :] # Adjusted indexing
|
124 |
+
next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(0)
|
125 |
+
generated = torch.cat((generated, next_token_id), dim=1)
|
126 |
+
if next_token_id.item() == self.tokenizer.sep_token_id:
|
127 |
+
break
|
128 |
+
generated_text = self.tokenizer.decode(generated[0], skip_special_tokens=True)
|
129 |
+
return generated_text
|
130 |
+
|
131 |
+
config = JudgeXLConfig()
|
132 |
+
model = JudgeXL(config)
|
133 |
+
|
134 |
+
# Register JudgeXLConfig with AutoConfig
|
135 |
+
JudgeXLConfig.register_for_auto_class(AutoConfig)
|
136 |
+
|
137 |
+
# Register JudgeXL with AutoModelForCausalLM
|
138 |
+
JudgeXL.register_for_auto_class(AutoModelForCausalLM)
|
139 |
+
|
140 |
+
model.push_to_hub("Wonder-Griffin/judge-xl-model")
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14c514e9ff5c7c297551a51e913a4ad6a636c9ac3de7e470dfe0c5638a0e2821
|
3 |
+
size 1053691936
|