allow the sharegpt handler to also better handle datasets destined for openai finetuning (#1361)
Browse files* allow the sharegpt handler to also better handle datasets destined for openai finetuning
* make sure to support system role
src/axolotl/prompt_strategies/sharegpt.py
CHANGED
@@ -82,7 +82,7 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
|
|
82 |
basic sharegpt strategy to grab conversations from the sample row
|
83 |
"""
|
84 |
|
85 |
-
_strict =
|
86 |
|
87 |
@property
|
88 |
def strict(self):
|
@@ -96,10 +96,25 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
|
|
96 |
conversations = prompt["conversations"]
|
97 |
if self.strict:
|
98 |
return conversations
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
turns = [
|
102 |
-
{"from": role_map[t[
|
|
|
103 |
]
|
104 |
return turns
|
105 |
|
|
|
82 |
basic sharegpt strategy to grab conversations from the sample row
|
83 |
"""
|
84 |
|
85 |
+
_strict = False
|
86 |
|
87 |
@property
|
88 |
def strict(self):
|
|
|
96 |
conversations = prompt["conversations"]
|
97 |
if self.strict:
|
98 |
return conversations
|
99 |
+
role_key = "from"
|
100 |
+
if "role" in conversations[0].keys():
|
101 |
+
role_key = "role"
|
102 |
+
value_key = "value"
|
103 |
+
if "text" in conversations[0].keys():
|
104 |
+
value_key = "text"
|
105 |
+
elif "content" in conversations[0].keys():
|
106 |
+
value_key = "content"
|
107 |
+
# remap roles - allow for assistant turn"
|
108 |
+
role_map = {
|
109 |
+
"user": "human",
|
110 |
+
"human": "human",
|
111 |
+
"assistant": "gpt",
|
112 |
+
"gpt": "gpt",
|
113 |
+
"system": "system",
|
114 |
+
}
|
115 |
turns = [
|
116 |
+
{"from": role_map[t[role_key]], "value": t[value_key]}
|
117 |
+
for t in conversations
|
118 |
]
|
119 |
return turns
|
120 |
|