jeffreymeetkai commited on
Commit
b1f1e88
1 Parent(s): d301c4d

update jinja chat template + readme usage

Browse files
Files changed (3) hide show
  1. README.md +4 -5
  2. tokenization_functionary.py +0 -524
  3. tokenizer_config.json +2 -5
README.md CHANGED
@@ -23,13 +23,13 @@ The model determines when to execute functions, whether in parallel or serially,
23
 
24
  ## How to Get Started
25
 
26
- We provide custom code for both converting tool definitions into the system prompts and parsing raw model response into a JSON object containing `role`, `content` and `tool_calls` fields. This enables the model to be able to generate tool calls.
27
 
28
  ```python
29
  from transformers import AutoModelForCausalLM, AutoTokenizer
30
 
31
- tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-small-v2.5", trust_remote_code=True)
32
- model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-small-v2.5", device_map="auto", trust_remote_code=True)
33
 
34
  tools = [
35
  {
@@ -53,7 +53,6 @@ tools = [
53
  messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}]
54
 
55
  final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False)
56
- tokenizer.padding_side = "left"
57
  inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")
58
  pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer)
59
  print(tokenizer.decode(pred.cpu()[0]))
@@ -63,7 +62,7 @@ print(tokenizer.decode(pred.cpu()[0]))
63
 
64
  We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.
65
 
66
- This formatting is also available via our vLLM server which we process the functions into Typescript definitions encapsulated in a system message and use a pre-defined Transformers chat template. This means that lists of messages can be formatted for you with the apply_chat_template() method within our server:
67
 
68
  ```python
69
  from openai import OpenAI
 
23
 
24
  ## How to Get Started
25
 
26
+ We provide custom code for parsing raw model responses into a JSON object containing `role`, `content` and `tool_calls` fields. This enables the users to read the function-calling output of the model easily.
27
 
28
  ```python
29
  from transformers import AutoModelForCausalLM, AutoTokenizer
30
 
31
+ tokenizer = AutoTokenizer.from_pretrained("meetkai/functionary-small-v3.2")
32
+ model = AutoModelForCausalLM.from_pretrained("meetkai/functionary-small-v3.2", device_map="auto", trust_remote_code=True)
33
 
34
  tools = [
35
  {
 
53
  messages = [{"role": "user", "content": "What is the weather in Istanbul and Singapore respectively?"}]
54
 
55
  final_prompt = tokenizer.apply_chat_template(messages, tools, add_generation_prompt=True, tokenize=False)
 
56
  inputs = tokenizer(final_prompt, return_tensors="pt").to("cuda")
57
  pred = model.generate_tool_use(**inputs, max_new_tokens=128, tokenizer=tokenizer)
58
  print(tokenizer.decode(pred.cpu()[0]))
 
62
 
63
  We convert function definitions to a similar text to TypeScript definitions. Then we inject these definitions as system prompts. After that, we inject the default system prompt. Then we start the conversation messages.
64
 
65
+ This formatting is also available via our vLLM server which we process the functions into Typescript definitions encapsulated in a system message using a pre-defined Transformers Jinja chat template. This means that the lists of messages can be formatted for you with the apply_chat_template() method within our server:
66
 
67
  ```python
68
  from openai import OpenAI
tokenization_functionary.py DELETED
@@ -1,524 +0,0 @@
1
- # Copyright (c) 2024, MeetKai Inc. All rights reserved.
2
-
3
- from copy import deepcopy
4
- import json
5
- from typing import Any, Dict, List, Literal, Optional, Union
6
-
7
- import jsonref
8
- from pydantic import BaseModel, Field, model_validator
9
- from typing_extensions import Self
10
-
11
- from transformers.tokenization_utils_base import BatchEncoding
12
- from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
13
- from transformers.utils import TensorType, logging
14
-
15
-
16
- logger = logging.get_logger(__name__)
17
- SYSTEM_PROMPT = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"""
18
- CODE_INTERPRETER_SYSTEM_PROMPT = """When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files."""
19
-
20
- class Function(BaseModel):
21
- name: str
22
- description: Optional[str] = Field(default="")
23
- parameters: Optional[dict] = None
24
-
25
-
26
- class Tool(BaseModel):
27
- type: Literal["function", "code_interpreter"]
28
- function: Optional[Function] = None
29
-
30
- @model_validator(mode="after")
31
- def check_type_function_matches(self) -> Self:
32
- if self.type == "function":
33
- assert self.function is not None, '"function" must contain function description when `"type": "function"`'
34
- else:
35
- assert self.function is None, '"function" must not be provided when `"type": "code_interpreter"`'
36
- return self
37
-
38
-
39
- def convert_data_type(param_type: str) -> str:
40
- """convert data_type to typescript data type
41
- Args:
42
- param_type (str): param_type
43
- Returns:
44
- str: param type in typescript
45
- """
46
- if param_type == "integer" or param_type == "float":
47
- return "number"
48
- return param_type
49
-
50
-
51
- def get_param_type(param: Dict) -> str:
52
- """get param_type of parameter
53
- Args:
54
- param (Dict): param dict in properties
55
- Returns:
56
- str: _description_
57
- """
58
- param_type = "any"
59
- if "type" in param:
60
- raw_param_type = param["type"]
61
- if type(raw_param_type) is list:
62
- param_type = " | ".join(raw_param_type)
63
- else:
64
- param_type = raw_param_type
65
-
66
- else: # in many cases, the json schema contains: oneOf instead of "type"
67
- if "oneOf" in param:
68
- one_of_types = []
69
- for item in param["oneOf"]:
70
- if "type" in item:
71
- one_of_types.append(convert_data_type(item["type"]))
72
- one_of_types = list(set(one_of_types))
73
- param_type = " | ".join(one_of_types)
74
- return convert_data_type(param_type)
75
-
76
-
77
- def get_format_param(param: Dict) -> Optional[str]:
78
- """Get "format" from param. There are cases where format is not directly in param but in oneOf
79
- Args:
80
- param (Dict): _description_
81
- Returns:
82
- Optional[str]: _description_
83
- """
84
- if "format" in param:
85
- return param["format"]
86
- if "oneOf" in param:
87
- formats = []
88
- for item in param["oneOf"]:
89
- if "format" in item:
90
- formats.append(item["format"])
91
- if len(formats) > 0:
92
- return " or ".join(formats)
93
- return None
94
-
95
-
96
- def get_param_info(param: Dict) -> Optional[str]:
97
- """get additional information about parameter such as: format, default value, min, max, ...
98
- Args:
99
- param (Dict): _description_
100
- Returns:
101
- Optional[str]: _description_
102
- """
103
- param_type = param.get("type", "any")
104
- info_list = []
105
- if "description" in param:
106
- desc = param["description"]
107
- if not desc.endswith("."):
108
- desc += "."
109
- info_list.append(desc)
110
-
111
- if "default" in param:
112
- default_value = param["default"]
113
- if param_type == "string":
114
- default_value = f'"{default_value}"' # if string --> add ""
115
- info_list.append(f"Default={default_value}.")
116
-
117
- format_param = get_format_param(param)
118
- if format_param is not None:
119
- info_list.append("Format=" + format_param)
120
-
121
- for field, field_name in [
122
- ("maximum", "Maximum"),
123
- ("minimum", "Minimum"),
124
- ("maxLength", "Maximum length"),
125
- ("minLength", "Minimum length"),
126
- ]:
127
- if field in param:
128
- info_list.append(f"{field_name}=" + str(param[field]))
129
-
130
- if len(info_list) > 0:
131
- result = "// " + " ".join(info_list)
132
- result = result.replace("\n", " ")
133
- return result
134
- return None
135
-
136
-
137
- def append_new_param_info(
138
- info_list: List[str],
139
- param_declaration: str,
140
- comment_info: Optional[str],
141
- examples_info: List,
142
- depth: int,
143
- ):
144
- """Append a new parameter with comment to the info_list
145
- Args:
146
- info_lines (List[str]): current info_list
147
- param_declaration (str): param: type
148
- comment_info (Optional[str]): information of comment
149
- examples_info (List): information of examples given
150
- depth (int): level of nested param
151
- """
152
- offset = ""
153
- if depth >= 1:
154
- offset = "".join([" " for _ in range(depth)])
155
- if comment_info is not None:
156
- # if depth == 0: # format: //comment\nparam: type
157
- info_list.append(f"{offset}{comment_info}")
158
- if len(examples_info) > 0:
159
- for example in examples_info:
160
- info_list.append(f"{offset}{example}")
161
- info_list.append(f"{offset}{param_declaration}")
162
- # else: # format: param: type // comment
163
- # info_list.append(f"{offset}{param_declaration} {comment_info}")
164
- else:
165
- info_list.append(f"{offset}{param_declaration}")
166
-
167
-
168
- def get_examples_info(param_name: str, examples: List) -> List:
169
- """get information about examples provided
170
- Args:
171
- param_name (str): _description_
172
- examples (List): _description_
173
- Returns:
174
- List: _description_
175
- """
176
- examples_list = [f"// Example {param_name}:"]
177
- for example in examples:
178
- if isinstance(example, dict) or isinstance(example, list):
179
- example_str = json.dumps(example, ensure_ascii=False).replace('\n', '\\n')
180
- else:
181
- example_str = str(example).replace('\n', '\\n')
182
- examples_list.append(f"// {example_str}")
183
-
184
- return examples_list
185
-
186
-
187
- def get_enum_option_str(enum_options: List) -> str:
188
- """get enum option separated by: "|"
189
- Args:
190
- enum_options (List): list of options
191
- Returns:
192
- _type_: concatenation of options separated by "|"
193
- """
194
- # if each option is string --> add quote
195
- return " | ".join([f'"{v}"' if type(v) is str else str(v) for v in enum_options])
196
-
197
-
198
- def get_array_typescript(
199
- param_name: Optional[str], param_dic: dict, depth: int = 0
200
- ) -> str:
201
- """recursive implementation for generating type script of array
202
- Args:
203
- param_name (Optional[str]): name of param, optional
204
- param_dic (dict): param_dic
205
- depth (int, optional): nested level. Defaults to 0.
206
- Returns:
207
- _type_: typescript of array
208
- """
209
- offset = ""
210
- if depth >= 1:
211
- offset = "".join([" " for _ in range(depth)])
212
- items_info = param_dic.get("items", {})
213
-
214
- if len(items_info) == 0:
215
- if param_name is not None:
216
- return f"{offset}{param_name}: []"
217
- else:
218
- return "[]"
219
- array_type = get_param_type(items_info)
220
- if array_type == "object":
221
- info_lines = []
222
- child_lines = get_parameter_typescript(
223
- items_info.get("properties", {}), items_info.get("required", []), depth + 1
224
- )
225
- # if comment_info is not None:
226
- # info_lines.append(f"{offset}{comment_info}")
227
- if param_name is not None:
228
- info_lines.append(f"{offset}{param_name}" + ": {")
229
- else:
230
- info_lines.append(f"{offset}" + "{")
231
- info_lines.extend(child_lines)
232
- info_lines.append(f"{offset}" + "}[]")
233
- return "\n".join(info_lines)
234
-
235
- elif array_type == "array":
236
- item_info = get_array_typescript(None, items_info, depth + 1)
237
- if param_name is None:
238
- return f"{item_info}[]"
239
- return f"{offset}{param_name}: {item_info.strip()}[]"
240
-
241
- else:
242
- if "enum" in items_info:
243
- item_type = get_enum_option_str(items_info["enum"])
244
- if param_name is None:
245
- return f"({item_type})[]"
246
- else:
247
- return f"{offset}{param_name}: ({item_type})[]"
248
- else:
249
- if param_name is None:
250
- return f"{array_type}[]"
251
- else:
252
- return f"{offset}{param_name}: {array_type}[],"
253
-
254
-
255
- def get_parameter_typescript(properties, required_params, depth=0) -> List[str]:
256
- """Recursion, returning the information about parameters including data type, description and other information
257
- These kinds of information will be put into the prompt
258
- Args:
259
- properties (_type_): properties in parameters
260
- required_params (_type_): List of required parameters
261
- depth (int, optional): the depth of params (nested level). Defaults to 0.
262
- Returns:
263
- _type_: list of lines containing information about all parameters
264
- """
265
- tp_lines = []
266
- for param_name, param in properties.items():
267
- # Sometimes properties have "required" field as a list of string.
268
- # Even though its supposed to be not under properties. So we skip it
269
- if not isinstance(param, dict):
270
- continue
271
- # Param Description
272
- comment_info = get_param_info(param)
273
- # Param Examples
274
- examples_info = []
275
- if "examples" in param:
276
- examples_info = get_examples_info(param_name, param["examples"])
277
- # Param Name declaration
278
- param_declaration = f"{param_name}"
279
- if isinstance(required_params, list):
280
- if param_name not in required_params:
281
- param_declaration += "?"
282
- param_type = get_param_type(param)
283
-
284
- offset = ""
285
- if depth >= 1:
286
- offset = "".join([" " for _ in range(depth)])
287
-
288
- if param_type == "object": # param_type is object
289
- child_lines = get_parameter_typescript(
290
- param.get("properties", {}), param.get("required", []), depth + 1
291
- )
292
- if comment_info is not None:
293
- tp_lines.append(f"{offset}{comment_info}")
294
- if len(examples_info) > 0:
295
- for example in examples_info:
296
- tp_lines.append(f"{offset}{example}")
297
-
298
- param_declaration += ": {"
299
- tp_lines.append(f"{offset}{param_declaration}")
300
- tp_lines.extend(child_lines)
301
- tp_lines.append(f"{offset}" + "},")
302
-
303
- elif param_type == "array": # param_type is an array
304
- item_info = param.get("items", {})
305
- if "type" not in item_info: # don't know type of array
306
- param_declaration += ": [],"
307
- append_new_param_info(
308
- tp_lines, param_declaration, comment_info, examples_info, depth
309
- )
310
- else:
311
- array_declaration = get_array_typescript(
312
- param_declaration, param, depth
313
- )
314
- if not array_declaration.endswith(","):
315
- array_declaration += ","
316
- if comment_info is not None:
317
- tp_lines.append(f"{offset}{comment_info}")
318
- if len(examples_info) > 0:
319
- for example in examples_info:
320
- tp_lines.append(f"{offset}{example}")
321
- tp_lines.append(array_declaration)
322
- else:
323
- if "enum" in param:
324
- param_type = get_enum_option_str(param["enum"])
325
- # param_type = " | ".join([f'"{v}"' for v in param["enum"]])
326
- if "nullable" in param and param["nullable"] is True:
327
- param_type += " | null"
328
- param_declaration += f": {param_type},"
329
- append_new_param_info(
330
- tp_lines, param_declaration, comment_info, examples_info, depth
331
- )
332
-
333
- return tp_lines
334
-
335
- def generate_schema_from_functions(
336
- functions: List[Function], namespace="functions"
337
- ) -> str:
338
- """
339
- Convert functions schema to a schema that language models can understand.
340
- """
341
-
342
- schema = "// Supported function definitions that should be called when necessary.\n"
343
- schema += f"namespace {namespace} {{\n\n"
344
-
345
- for function in functions:
346
- # Convert a Function object to dict, if necessary
347
- if not isinstance(function, dict):
348
- function = function.model_dump()
349
- function_name = function.get("name", None)
350
- if function_name is None:
351
- continue
352
-
353
- description = function.get("description", "")
354
- schema += f"// {description}\n"
355
- schema += f"type {function_name}"
356
-
357
- parameters = function.get("parameters", None)
358
- if parameters is not None and parameters.get("properties") is not None:
359
- parameters = deepcopy(jsonref.JsonRef.replace_refs(parameters))
360
- schema += " = (_: {\n"
361
- required_params = parameters.get("required", [])
362
- tp_lines = get_parameter_typescript(
363
- parameters.get("properties"),
364
- required_params,
365
- 0,
366
- )
367
- schema += "\n".join(tp_lines)
368
- schema += "\n}) => any;\n\n"
369
- else:
370
- # Doesn't have any parameters
371
- schema += " = () => any;\n\n"
372
-
373
- schema += f"}} // namespace {namespace}"
374
-
375
- return schema
376
-
377
- class FunctionaryTokenizer(PreTrainedTokenizerFast):
378
- def apply_chat_template(
379
- self,
380
- conversation: Union[List[Dict[str, str]], List[List[Dict[str, str]]], str],
381
- tools: Optional[List[Dict[str, Any]]],
382
- chat_template: Optional[str] = None,
383
- add_generation_prompt: bool = False,
384
- tokenize: bool = True,
385
- padding: bool = False,
386
- truncation: bool = False,
387
- max_length: Optional[int] = None,
388
- return_tensors: Optional[Union[str, TensorType]] = None,
389
- return_dict: bool = False,
390
- tokenizer_kwargs: Optional[Dict[str, Any]] = None,
391
- **kwargs,
392
- ) -> Union[str, List[int], List[str], List[List[int]], BatchEncoding]:
393
-
394
- if return_dict and not tokenize:
395
- raise ValueError(
396
- "`return_dict=True` is incompatible with `tokenize=False`, because there is no dict "
397
- "of tokenizer outputs to return."
398
- )
399
-
400
- if tokenizer_kwargs is None:
401
- tokenizer_kwargs = {}
402
-
403
- using_default_template = False
404
-
405
- # First, handle the cases when the model has a dict of multiple templates
406
- if isinstance(self.chat_template, dict) or (
407
- self.chat_template is None and isinstance(self.default_chat_template, dict)
408
- ):
409
- if self.chat_template is not None:
410
- template_dict = self.chat_template
411
- using_default_dict = False
412
- else:
413
- template_dict = self.default_chat_template
414
- using_default_dict = True
415
- if chat_template is not None and chat_template in template_dict:
416
- # The user can pass the name of a template to the chat template argument instead of an entire template
417
- chat_template = template_dict[chat_template]
418
- if using_default_dict:
419
- using_default_template = True
420
- elif chat_template is None and "default" in template_dict:
421
- chat_template = template_dict["default"]
422
- if using_default_dict:
423
- using_default_template = True
424
- elif chat_template is None:
425
- raise ValueError(
426
- "This model has multiple chat templates with no default specified! Please either pass a chat "
427
- "template or the name of the template you wish to use to the `chat_template` argument. Available "
428
- f"template names are {sorted(template_dict.keys())}."
429
- )
430
- elif chat_template is None:
431
- # These are the cases when the model has a single template
432
- # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
433
- if self.chat_template is not None:
434
- chat_template = self.chat_template
435
- else:
436
- chat_template = self.default_chat_template
437
- using_default_template = True
438
-
439
- if using_default_template:
440
- logger.warning_once(
441
- "No chat template is set for this tokenizer, falling back to a default class-level template. This is "
442
- "very error-prone, because models are often trained with templates different from the class default! "
443
- "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
444
- "point any code depending on them will stop working. We recommend setting a valid chat template before "
445
- "then to ensure that this model continues working without issues."
446
- )
447
-
448
- PYTHON_RUN_SYS_MSG = "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files."
449
- SYSTEM_CONTENT = """You are capable of executing available function(s) if required.
450
- Only execute function(s) when absolutely necessary.
451
- Ask for the required input to:recipient==all
452
- Use JSON for function arguments.
453
- Respond in this format:
454
- >>>${recipient}
455
- ${content}
456
- Available functions:
457
- """
458
-
459
- # Prepare tools/functions into schema
460
- functions_pydantic_to_render = []
461
- has_code_interpreter = False
462
- if tools is not None:
463
- for item in tools:
464
- if (
465
- "function" in item and item["function"] is not None
466
- ): # new data format: tools: [{"type": xx, "function": xxx}]
467
- functions_pydantic_to_render.append(item["function"])
468
- elif "type" in item and item["type"] == "code_interpreter":
469
- has_code_interpreter = True
470
- else:
471
- functions_pydantic_to_render.append(item) # old format
472
-
473
- conversation.insert(
474
- 0,
475
- {
476
- "role": "system",
477
- "content": SYSTEM_CONTENT + generate_schema_from_functions(functions_pydantic_to_render),
478
- },
479
- )
480
- if has_code_interpreter:
481
- conversation.insert(1, {"role": "system", "content": PYTHON_RUN_SYS_MSG})
482
-
483
- # Compilation function uses a cache to avoid recompiling the same template
484
- compiled_template = self._compile_jinja_template(chat_template)
485
-
486
- if isinstance(conversation, (list, tuple)) and (
487
- isinstance(conversation[0], (list, tuple)) or hasattr(conversation[0], "messages")
488
- ):
489
- conversations = conversation
490
- is_batched = True
491
- else:
492
- conversations = [conversation]
493
- is_batched = False
494
-
495
- rendered = []
496
- template_kwargs = {**self.special_tokens_map, **kwargs} # kwargs overwrite special tokens if both are present
497
- for chat in conversations:
498
- if hasattr(chat, "messages"):
499
- # Indicates it's a Conversation object
500
- chat = chat.messages
501
- rendered_chat = compiled_template.render(
502
- messages=chat, add_generation_prompt=add_generation_prompt, **template_kwargs
503
- )
504
- rendered.append(rendered_chat)
505
-
506
- if not is_batched:
507
- rendered = rendered[0]
508
-
509
- if tokenize:
510
- out = self(
511
- rendered,
512
- padding=padding,
513
- truncation=truncation,
514
- max_length=max_length,
515
- add_special_tokens=False,
516
- return_tensors=return_tensors,
517
- **tokenizer_kwargs,
518
- )
519
- if return_dict:
520
- return out
521
- else:
522
- return out["input_ids"]
523
- else:
524
- return rendered
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -2050,7 +2050,7 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{# version=v3.llama3 #}{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ '>>>all\n' + message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '>>>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n>>>' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
  "legacy": true,
@@ -2061,8 +2061,5 @@
2061
  "model_max_length": 16384,
2062
  "pad_token": "<|eot_id|>",
2063
  "padding_side": "right",
2064
- "tokenizer_class": "PreTrainedTokenizerFast",
2065
- "auto_map": {
2066
- "AutoTokenizer": ["tokenization_functionary.FunctionaryTokenizer", null]
2067
- }
2068
  }
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{# version=v3.llama3 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- if examples_info | length > 0 -%}\n {# Append each example info #}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{ \"\\n\" + offset + param_declaration }}\n{%- endmacro -%}\n\n{%- macro convert_data_type(param_type) -%}\n {%- if param_type == \"integer\" or param_type == \"float\" -%}\n {{ \"number\" }}\n {%- else -%}\n {{ param_type }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_type(param) -%}\n {%- set param_type = \"any\" -%}\n\n {%- if \"type\" in param -%}\n {%- set raw_param_type = param[\"type\"] -%}\n {%- if raw_param_type is iterable and raw_param_type is not string -%}\n {%- set param_type = raw_param_type | join(\" | \") -%}\n {%- else -%}\n {%- set param_type = raw_param_type -%}\n {%- endif -%}\n {{ convert_data_type(param_type) }}\n {%- elif \"oneOf\" in param -%}\n {%- set one_of_types = param[\"oneOf\"]|selectattr(\"type\", \"defined\")|list -%}\n {%- set one_of_types = one_of_types|map(attribute=\"type\")|unique|list -%}\n {{ convert_data_type(one_of_types | join(\" | \")) }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_format_param(param) -%}\n {%- if \"format\" in param -%}\n {{ param[\"format\"] }}\n {%- elif \"oneOf\" in param -%}\n {%- set formats = [] -%}\n {%- for item in param[\"oneOf\"] -%}\n {%- if \"format\" in item -%}\n {%- if item[\"format\"] == param[\"oneOf\"][-1][\"format\"] -%}\n {{ item[\"format\"] }}\n {%- else -%}\n {{ item[\"format\"] + \" or \"}}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\" }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_info(param) -%}\n {%- set param_type = param.get(\"type\", \"any\") -%}\n {%- set format_param = get_format_param(param) -%}\n\n {%- if \"description\" in param or \"default\" in param or format_param != \"<|NONE|>\" or param[\"maximum\"] or param[\"minimum\"] or param[\"maxLength\"] or param[\"minLength\"] -%}\n {{ \"//\" }}\n {%- if \"description\" in param -%}\n {%- set desc = param[\"description\"] -%}\n {%- if not desc.endswith(\".\") -%}\n {%- set desc = desc + \".\" -%}\n {%- endif -%}\n {{ \" \" + desc }}\n {%- endif -%}\n\n {%- if \"default\" in param -%}\n {%- set default_value = param[\"default\"] -%}\n {%- if param_type == \"string\" -%}\n {%- set default_value = '\"' ~ default_value ~ '\"' -%}\n {%- endif -%}\n {{ \" Default=\" ~ default_value ~ \".\" }}\n {%- endif -%}\n\n {%- set format_param = get_format_param(param) -%}\n {%- if format_param != \"<|NONE|>\" -%}\n {{ \" Format=\" ~ format_param }}\n {%- endif -%}\n\n {%- for field, field_name in [(\"maximum\", \"Maximum\"), (\"minimum\", \"Minimum\"), (\"maxLength\", \"Maximum length\"), (\"minLength\", \"Minimum length\")] -%}\n {%- if field in param -%}\n {{ \" \" + field_name ~ \"=\" ~ param[field] }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\"}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_enum_option_str(enum_options) -%}\n {%- for v in enum_options -%}\n {%- if v is string -%}\n {{ '\"' + v + '\"' }}\n {%- else -%}\n {{ v }}\n {%- endif -%}\n {%- if enum_options|length > 0 and v != enum_options[-1] -%}\n {{ \" | \" }}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro get_array_typescript(param_name, param_dic, depth) -%}\n {%- set offset = '' -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- set items_info = param_dic.get('items', {}) -%}\n\n {%- if items_info|length == 0 -%}\n {%- if param_name is not none -%}\n {{ \"\\n\" + offset + param_name + \": []\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- set array_type = get_param_type(items_info) -%}\n {%- if array_type == 'object' -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": {\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"{\" }}\n {%- endif -%}\n {{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}}\n {{- \"\\n\" + offset + \"}[]\" }}\n {%- elif array_type == 'array' -%}\n {%- set item_info = get_array_typescript(None, items_info, depth + 1) -%}\n {%- if not param_name -%}\n {{ \"\\n\" + item_info + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + item_info|trim + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if 'enum' in items_info -%}\n {%- set item_type = get_enum_option_str(items_info['enum']) -%}\n {%- if param_name is none -%}\n {{ \"(\" + item_type + \")[]\"}}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": (\" + item_type + \")[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if param_name is none -%}\n {{ \"\\n\" + array_type + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + array_type + \"[],\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_parameter_typescript(properties, required_params, depth=0) -%}\n {%- set res = \"\" -%}\n {%- for param_name, param in properties.items() -%}\n {%- if param is mapping -%}\n {%- set comment_info = get_param_info(param) -%}\n {# Param Examples #}\n {%- set examples_info = [] -%}\n {%- if \"examples\" in param -%}\n {%- set examples_info = [\"Example \" + param_name + \":\"] -%}\n {%- set examples_info = examples_info + param[\"examples\"] -%}\n {%- endif -%}\n\n {# Param Name declaration #}\n {%- set param_declaration = param_name -%}\n {%- if required_params is iterable and param_name not in required_params -%}\n {%- set param_declaration = param_declaration + \"?\" -%}\n {%- endif -%}\n\n {%- set param_type = get_param_type(param) -%}\n\n {# Handle indentation based on depth #}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n\n {%- if param_type == \"object\" -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": {\" -%}\n {{ \"\\n\" + offset + param_declaration -}}\n {{- get_parameter_typescript(param.get(\"properties\", {}), param.get(\"required\", []), depth + 1) -}}\n {{- \"\\n\" + offset + \"},\" }}\n {%- elif param_type == \"array\" -%}\n {%- set item_info = param.get(\"items\", {}) -%}\n {%- if \"type\" not in item_info -%}\n {%- set param_declaration = param_declaration + \": [],\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- else -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%}\n {%- if not array_declaration.endswith(\",\") -%}\n {%- set array_declaration = array_declaration + \",\" -%}\n {%- endif -%}\n {{ array_declaration}}\n {%- endif -%}\n {%- else -%}\n {%- if \"enum\" in param -%}\n {%- set param_type = get_enum_option_str(param[\"enum\"]) -%}\n {%- endif -%}\n {%- if \"nullable\" in param and param[\"nullable\"] -%}\n {%- set param_type = param_type + \" | null\" -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": \" + param_type + \",\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro generate_schema_from_functions(functions, namespace='functions') -%}\n {{ \"// Supported function definitions that should be called when necessary.\\n\" -}}\n {{- \"namespace \" + namespace + \" {\\n\\n\" -}}\n\n {%- for function in functions -%}\n {%- if function.get(\"function\") is not none -%}\n {%- set function = function.get(\"function\") -%}\n {%- endif -%}\n\n {%- set function_name = function.get(\"name\") -%}\n {%- if function_name is not none -%}\n {%- set description = function.get('description', '') -%}\n {%- set parameters = function.get('parameters', {}) -%}\n {{- \"// \" + description + \"\\n\" -}}\n {{- \"type \" + function_name -}}\n {%- if parameters is not none and parameters.get(\"properties\") is not none -%}\n {{- \" = (_: {\" -}}\n {%- set required_params = parameters.get(\"required\", []) -%}\n {{ get_parameter_typescript(parameters.get(\"properties\"), required_params, 0) -}}\n {{- \"\\n}) => any;\\n\\n\" }}\n {%- else -%}\n {{ \" = () => any;\\n\\n\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {{ \"} // namespace \" + namespace }}\n{%- endmacro -%}\n{%- if not tools -%}\n {%- set tools = [] -%}\n{%- endif -%}\n{{ '<|start_header_id|>system<|end_header_id|>\\n\\nYou are capable of executing available function(s) if required.\\nOnly execute function(s) when absolutely necessary.\\nAsk for the required input to:recipient==all\\nUse JSON for function arguments.\\nRespond in this format:\\n>>>${recipient}\\n${content}\\nAvailable functions:\\n' + generate_schema_from_functions(tools) + '<|eot_id|>' -}}\n{%- if tools|length > 0 and tools|selectattr(\"type\", \"equalto\", \"code_interpreter\")|list|length > 0 -%}\n {{ '<|start_header_id|>system<|end_header_id|>\\n\\nWhen you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at \\'/mnt/data\\' can be used to save and persist user files.<|eot_id|>' }}\n{%- endif -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'user' or message['role'] == 'system' -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- elif message['role'] == 'tool' -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- else -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'}}\n {%- if message['content'] is not none -%}\n {{ '>>>all\\n' + message['content'] }}\n {% endif %}\n {%- if 'tool_calls' in message and message['tool_calls'] is not none -%}\n {{ '>>>' + tool_call['function']['name'] + '\\n' + tool_call['function']['arguments'] }}\n {%- endif -%}\n {{ '<|eot_id|>' }}\n {%- endif -%}\n{%- endfor -%}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n>>>' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
  "legacy": true,
 
2061
  "model_max_length": 16384,
2062
  "pad_token": "<|eot_id|>",
2063
  "padding_side": "right",
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
2065
  }