Nanobit commited on
Commit
9cd27b2
1 Parent(s): c1b741d

fix(readme): clarify custom user prompt [no-ci] (#1124)

Browse files

* fix(readme): clarify custom user prompt

* chore: update example to show use case of setting field

Files changed (1) hide show
  1. README.md +6 -3
README.md CHANGED
@@ -374,7 +374,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
374
  For a dataset that is preprocessed for instruction purposes:
375
 
376
  ```json
377
- {"instruction": "...", "output": "..."}
378
  ```
379
 
380
  You can use this example in your YAML config:
@@ -385,6 +385,8 @@ datasets:
385
  type:
386
  system_prompt: ""
387
  field_system: system
 
 
388
  format: "[INST] {instruction} [/INST]"
389
  no_input_format: "[INST] {instruction} [/INST]"
390
  ```
@@ -577,10 +579,10 @@ datasets:
577
  field_human: # Optional[str]. Human key to use for conversation.
578
  field_model: # Optional[str]. Assistant key to use for conversation.
579
 
580
- # Custom user prompt
581
  - path: repo
582
  type:
583
- # The below are defaults. only set what's needed.
584
  system_prompt: ""
585
  system_format: "{system}"
586
  field_system: system
@@ -589,6 +591,7 @@ datasets:
589
  field_output: output
590
 
591
  # Customizable to be single line or multi-line
 
592
  # 'format' can include {input}
593
  format: |-
594
  User: {instruction} {input}
 
374
  For a dataset that is preprocessed for instruction purposes:
375
 
376
  ```json
377
+ {"input": "...", "output": "..."}
378
  ```
379
 
380
  You can use this example in your YAML config:
 
385
  type:
386
  system_prompt: ""
387
  field_system: system
388
+ field_instruction: input
389
+ field_output: output
390
  format: "[INST] {instruction} [/INST]"
391
  no_input_format: "[INST] {instruction} [/INST]"
392
  ```
 
579
  field_human: # Optional[str]. Human key to use for conversation.
580
  field_model: # Optional[str]. Assistant key to use for conversation.
581
 
582
+ # Custom user instruction prompt
583
  - path: repo
584
  type:
585
+ # The below are defaults. only set what's needed if you use a different column name.
586
  system_prompt: ""
587
  system_format: "{system}"
588
  field_system: system
 
591
  field_output: output
592
 
593
  # Customizable to be single line or multi-line
594
+ # Use {instruction}/{input} as key to be replaced
595
  # 'format' can include {input}
596
  format: |-
597
  User: {instruction} {input}