AIFunOver commited on
Commit
cdc327e
1 Parent(s): ec4a772

Upload openvino_detokenizer.xml with huggingface_hub

Browse files
Files changed (1) hide show
  1. openvino_detokenizer.xml +235 -0
openvino_detokenizer.xml ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_127701" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_127701">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Convert_127717" type="Convert" version="opset1">
14
+ <data destination_type="i32" />
15
+ <input>
16
+ <port id="0" precision="I64">
17
+ <dim>-1</dim>
18
+ <dim>-1</dim>
19
+ </port>
20
+ </input>
21
+ <output>
22
+ <port id="1" precision="I32">
23
+ <dim>-1</dim>
24
+ <dim>-1</dim>
25
+ </port>
26
+ </output>
27
+ </layer>
28
+ <layer id="2" name="Constant_127668" type="Const" version="opset1">
29
+ <data element_type="u8" shape="1351800" offset="0" size="1351800" />
30
+ <output>
31
+ <port id="0" precision="U8">
32
+ <dim>1351800</dim>
33
+ </port>
34
+ </output>
35
+ </layer>
36
+ <layer id="3" name="StringTensorUnpack_127669" type="StringTensorUnpack" version="extension">
37
+ <data mode="begins_ends" />
38
+ <input>
39
+ <port id="0" precision="U8">
40
+ <dim>1351800</dim>
41
+ </port>
42
+ </input>
43
+ <output>
44
+ <port id="1" precision="I32">
45
+ <dim>-1</dim>
46
+ </port>
47
+ <port id="2" precision="I32">
48
+ <dim>-1</dim>
49
+ </port>
50
+ <port id="3" precision="U8">
51
+ <dim>-1</dim>
52
+ </port>
53
+ </output>
54
+ </layer>
55
+ <layer id="4" name="VocabDecoder_127702" type="VocabDecoder" version="extension">
56
+ <data skip_tokens="128000, 128001, 128002, 128003, 128004, 128005, 128006, 128007, 128008, 128009, 128010, 128011, 128012, 128013, 128014, 128015, 128016, 128017, 128018, 128019, 128020, 128021, 128022, 128023, 128024, 128025, 128026, 128027, 128028, 128029, 128030, 128031, 128032, 128033, 128034, 128035, 128036, 128037, 128038, 128039, 128040, 128041, 128042, 128043, 128044, 128045, 128046, 128047, 128048, 128049, 128050, 128051, 128052, 128053, 128054, 128055, 128056, 128057, 128058, 128059, 128060, 128061, 128062, 128063, 128064, 128065, 128066, 128067, 128068, 128069, 128070, 128071, 128072, 128073, 128074, 128075, 128076, 128077, 128078, 128079, 128080, 128081, 128082, 128083, 128084, 128085, 128086, 128087, 128088, 128089, 128090, 128091, 128092, 128093, 128094, 128095, 128096, 128097, 128098, 128099, 128100, 128101, 128102, 128103, 128104, 128105, 128106, 128107, 128108, 128109, 128110, 128111, 128112, 128113, 128114, 128115, 128116, 128117, 128118, 128119, 128120, 128121, 128122, 128123, 128124, 128125, 128126, 128127, 128128, 128129, 128130, 128131, 128132, 128133, 128134, 128135, 128136, 128137, 128138, 128139, 128140, 128141, 128142, 128143, 128144, 128145, 128146, 128147, 128148, 128149, 128150, 128151, 128152, 128153, 128154, 128155, 128156, 128157, 128158, 128159, 128160, 128161, 128162, 128163, 128164, 128165, 128166, 128167, 128168, 128169, 128170, 128171, 128172, 128173, 128174, 128175, 128176, 128177, 128178, 128179, 128180, 128181, 128182, 128183, 128184, 128185, 128186, 128187, 128188, 128189, 128190, 128191, 128192, 128193, 128194, 128195, 128196, 128197, 128198, 128199, 128200, 128201, 128202, 128203, 128204, 128205, 128206, 128207, 128208, 128209, 128210, 128211, 128212, 128213, 128214, 128215, 128216, 128217, 128218, 128219, 128220, 128221, 128222, 128223, 128224, 128225, 128226, 128227, 128228, 128229, 128230, 128231, 128232, 128233, 128234, 128235, 128236, 128237, 128238, 128239, 128240, 128241, 128242, 128243, 128244, 128245, 128246, 128247, 128248, 128249, 128250, 128251, 128252, 128253, 128254, 128255" />
57
+ <input>
58
+ <port id="0" precision="I32">
59
+ <dim>-1</dim>
60
+ <dim>-1</dim>
61
+ </port>
62
+ <port id="1" precision="I32">
63
+ <dim>-1</dim>
64
+ </port>
65
+ <port id="2" precision="I32">
66
+ <dim>-1</dim>
67
+ </port>
68
+ <port id="3" precision="U8">
69
+ <dim>-1</dim>
70
+ </port>
71
+ </input>
72
+ <output>
73
+ <port id="4" precision="I32">
74
+ <dim>-1</dim>
75
+ </port>
76
+ <port id="5" precision="I32">
77
+ <dim>-1</dim>
78
+ </port>
79
+ <port id="6" precision="I32">
80
+ <dim>-1</dim>
81
+ </port>
82
+ <port id="7" precision="I32">
83
+ <dim>-1</dim>
84
+ </port>
85
+ <port id="8" precision="U8">
86
+ <dim>-1</dim>
87
+ </port>
88
+ </output>
89
+ </layer>
90
+ <layer id="5" name="FuzeRagged_127703" type="FuzeRagged" version="extension">
91
+ <input>
92
+ <port id="0" precision="I32">
93
+ <dim>-1</dim>
94
+ </port>
95
+ <port id="1" precision="I32">
96
+ <dim>-1</dim>
97
+ </port>
98
+ <port id="2" precision="I32">
99
+ <dim>-1</dim>
100
+ </port>
101
+ <port id="3" precision="I32">
102
+ <dim>-1</dim>
103
+ </port>
104
+ </input>
105
+ <output>
106
+ <port id="4" precision="I32">
107
+ <dim>-1</dim>
108
+ </port>
109
+ <port id="5" precision="I32">
110
+ <dim>-1</dim>
111
+ </port>
112
+ </output>
113
+ </layer>
114
+ <layer id="6" name="Constant_127705" type="Const" version="opset1">
115
+ <data element_type="u8" shape="47" offset="1351800" size="47" />
116
+ <output>
117
+ <port id="0" precision="U8">
118
+ <dim>47</dim>
119
+ </port>
120
+ </output>
121
+ </layer>
122
+ <layer id="7" name="Constant_127707" type="Const" version="opset1">
123
+ <data element_type="u8" shape="2" offset="1351847" size="2" />
124
+ <output>
125
+ <port id="0" precision="U8">
126
+ <dim>2</dim>
127
+ </port>
128
+ </output>
129
+ </layer>
130
+ <layer id="8" name="RegexNormalization_127708" type="RegexNormalization" version="extension">
131
+ <data global_replace="true" />
132
+ <input>
133
+ <port id="0" precision="I32">
134
+ <dim>-1</dim>
135
+ </port>
136
+ <port id="1" precision="I32">
137
+ <dim>-1</dim>
138
+ </port>
139
+ <port id="2" precision="U8">
140
+ <dim>-1</dim>
141
+ </port>
142
+ <port id="3" precision="U8">
143
+ <dim>47</dim>
144
+ </port>
145
+ <port id="4" precision="U8">
146
+ <dim>2</dim>
147
+ </port>
148
+ </input>
149
+ <output>
150
+ <port id="5" precision="I32">
151
+ <dim>-1</dim>
152
+ </port>
153
+ <port id="6" precision="I32">
154
+ <dim>-1</dim>
155
+ </port>
156
+ <port id="7" precision="U8">
157
+ <dim>-1</dim>
158
+ </port>
159
+ </output>
160
+ </layer>
161
+ <layer id="9" name="StringTensorPack_127709" type="StringTensorPack" version="extension">
162
+ <data mode="begins_ends" />
163
+ <input>
164
+ <port id="0" precision="I32">
165
+ <dim>-1</dim>
166
+ </port>
167
+ <port id="1" precision="I32">
168
+ <dim>-1</dim>
169
+ </port>
170
+ <port id="2" precision="U8">
171
+ <dim>-1</dim>
172
+ </port>
173
+ </input>
174
+ <output>
175
+ <port id="3" precision="STRING" names="string_output">
176
+ <dim>-1</dim>
177
+ </port>
178
+ </output>
179
+ </layer>
180
+ <layer id="10" name="Result_127710" type="Result" version="opset1">
181
+ <input>
182
+ <port id="0" precision="STRING">
183
+ <dim>-1</dim>
184
+ </port>
185
+ </input>
186
+ </layer>
187
+ </layers>
188
+ <edges>
189
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
190
+ <edge from-layer="1" from-port="1" to-layer="4" to-port="0" />
191
+ <edge from-layer="2" from-port="0" to-layer="3" to-port="0" />
192
+ <edge from-layer="3" from-port="1" to-layer="4" to-port="1" />
193
+ <edge from-layer="3" from-port="2" to-layer="4" to-port="2" />
194
+ <edge from-layer="3" from-port="3" to-layer="4" to-port="3" />
195
+ <edge from-layer="4" from-port="7" to-layer="5" to-port="3" />
196
+ <edge from-layer="4" from-port="8" to-layer="8" to-port="2" />
197
+ <edge from-layer="4" from-port="6" to-layer="5" to-port="2" />
198
+ <edge from-layer="4" from-port="5" to-layer="5" to-port="1" />
199
+ <edge from-layer="4" from-port="4" to-layer="5" to-port="0" />
200
+ <edge from-layer="5" from-port="4" to-layer="8" to-port="0" />
201
+ <edge from-layer="5" from-port="5" to-layer="8" to-port="1" />
202
+ <edge from-layer="6" from-port="0" to-layer="8" to-port="3" />
203
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="4" />
204
+ <edge from-layer="8" from-port="5" to-layer="9" to-port="0" />
205
+ <edge from-layer="8" from-port="6" to-layer="9" to-port="1" />
206
+ <edge from-layer="8" from-port="7" to-layer="9" to-port="2" />
207
+ <edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
208
+ </edges>
209
+ <rt_info>
210
+ <add_attention_mask value="True" />
211
+ <add_prefix_space />
212
+ <add_special_tokens value="True" />
213
+ <bos_token_id value="128000" />
214
+ <chat_template value="{%- if messages|length % 2 == 0 -%}&#10; {%- set role = 'Agent' -%}&#10;{%- else -%}&#10; {%- set role = 'User' -%}&#10;{%- endif -%}&#10;{%- if categories is not defined -%}&#10; {%- &#10; set categories = ({&#10; &quot;S1&quot;: &quot;Violent Crimes.&quot;,&#10; &quot;S2&quot;: &quot;Non-Violent Crimes.&quot;,&#10; &quot;S3&quot;: &quot;Sex Crimes.&quot;,&#10; &quot;S4&quot;: &quot;Child Exploitation.&quot;,&#10; &quot;S5&quot;: &quot;Defamation.&quot;,&#10; &quot;S6&quot;: &quot;Specialized Advice.&quot;,&#10; &quot;S7&quot;: &quot;Privacy.&quot;,&#10; &quot;S8&quot;: &quot;Intellectual Property.&quot;,&#10; &quot;S9&quot;: &quot;Indiscriminate Weapons.&quot;,&#10; &quot;S10&quot;: &quot;Hate.&quot;,&#10; &quot;S11&quot;: &quot;Self-Harm.&quot;,&#10; &quot;S12&quot;: &quot;Sexual Content.&quot;,&#10; &quot;S13&quot;: &quot;Elections.&quot;,&#10; })&#10; -%}&#10;{%- endif -%}&#10;&#10;{{- &quot;&lt;|begin_of_text|>&lt;|start_header_id|>user&lt;|end_header_id|>\n\n&quot; -}}&#10;{%- set ns = namespace(contains_image=false) -%}&#10;{%- for message in messages -%}&#10; {%- for content in message['content'] | selectattr('type', 'equalto', 'image') -%}&#10; {%- set ns.contains_image = true -%}&#10; {%- endfor -%}&#10;{%- endfor -%}&#10;{%- if ns.contains_image -%}&#10;{{- &quot;&lt;|image|> &quot;-}}&#10;{%- endif -%}&#10;{{- &quot;Task: Check if there is unsafe content in '&quot; + role + &quot;' messages in conversations according our safety policy with the below categories.\n\n&quot; -}}&#10;{{- &quot;&lt;BEGIN UNSAFE CONTENT CATEGORIES>\n&quot;-}}&#10;&#10;{%- for key in categories -%}&#10; {%- if key not in excluded_category_keys -%}&#10; {{-key + &quot;: &quot; + categories[key] + &quot;\n&quot; -}}&#10; {%- endif -%}&#10;{%- endfor -%}&#10;&#10;{{- &quot;&lt;END UNSAFE CONTENT CATEGORIES>\n\n&quot;-}}&#10;{{- &quot;&lt;BEGIN CONVERSATION>\n\n&quot;-}}&#10;{%- for message in messages -%}&#10; {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}&#10; {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...')-}}&#10; {%- endif -%}&#10; {%- set content = message['content'] -%}&#10; {%- if message['role'] == 'user' -%}&#10; {%- set role = 'User' -%}&#10; {%- elif message['role'] == 'assistant' -%}&#10; {%- set role = 'Agent' -%}&#10; {%- endif -%}&#10; {%- for content in message['content'] | selectattr('type', 'equalto', 'text') -%}&#10;{{- role + ': ' + content['text'] | trim + '\n\n'-}}&#10; {%- endfor -%}&#10;{%- endfor -%}&#10;{{- &quot;&lt;END CONVERSATION>\n\n&quot;-}}&#10;{{- &quot;Provide your safety assessment for ONLY THE LAST &quot; + role + &quot; message in the above conversation:\n&quot;-}}&#10;{{- &quot; - First line must read 'safe' or 'unsafe'.\n&quot;-}}&#10;{{- &quot; - If unsafe, a second line must include a comma-separated list of violated categories. &lt;|eot_id|>&lt;|start_header_id|>assistant&lt;|end_header_id|>&quot;-}}&#10;" />
215
+ <clean_up_tokenization_spaces />
216
+ <detokenizer_input_type value="i64" />
217
+ <eos_token_id value="128009" />
218
+ <handle_special_tokens_with_re />
219
+ <number_of_inputs value="1" />
220
+ <openvino_tokenizers_version value="2024.5.0.0" />
221
+ <openvino_version value="2024.5.0" />
222
+ <original_tokenizer_class value="&lt;class 'transformers.tokenization_utils_fast.PreTrainedTokenizerFast'>" />
223
+ <sentencepiece_version value="0.2.0" />
224
+ <skip_special_tokens value="True" />
225
+ <streaming_detokenizer value="False" />
226
+ <tiktoken_version value="0.8.0" />
227
+ <tokenizer_output_type value="i64" />
228
+ <tokenizers_version value="0.20.1" />
229
+ <transformers_version value="4.46.3" />
230
+ <use_max_padding value="False" />
231
+ <use_sentencepiece_backend value="False" />
232
+ <utf8_replace_mode />
233
+ <with_detokenizer value="True" />
234
+ </rt_info>
235
+ </net>