hchak's picture
Upload folder using huggingface_hub
84e0912 verified
import gradio as gr
import json
def validate_base_format(data):
entries = data.split("\n") # Moved outside try for broader error handling scope
for i, entry in enumerate(entries):
try:
if not entry.strip():
continue
json_data = json.loads(entry)
messages = json_data["messages"]
if not isinstance(messages, list) or not all("role" in message and "content" in message for message in messages):
return False, i + 1
except json.JSONDecodeError: # Catch decoding errors specifically for each entry
return False, i + 1
return True, None
def validate_conversational_format(data):
entries = data.split("\n")
for i, entry in enumerate(entries):
try:
if not entry.strip():
continue
json_data = json.loads(entry)
if "prompt" not in json_data or "completion" not in json_data:
return False, i + 1
except json.JSONDecodeError: # Catch decoding errors specifically for each entry
return False, i + 1
return True, None
def validate_multi_turn_format(data):
entries = data.split("\n")
for i, entry in enumerate(entries):
try:
if not entry.strip():
continue
json_data = json.loads(entry)
messages = json_data["messages"]
if not isinstance(messages, list) or not all("role" in message and "content" in message for message in messages):
return False, i + 1
for message in messages:
if message["role"] == "assistant" and "weight" in message:
if message["weight"] not in [0, 1]:
return False, i + 1
except json.JSONDecodeError: # Catch decoding errors specifically for each entry
return False, i + 1
return True, None
def process_data(text, file, option):
try:
if file:
data = file.read().decode("utf-8")
else:
data = text
if option == "Base":
is_valid, line_error = validate_base_format(data)
if not is_valid:
return f"Error: Input does not follow the required 'Base' format at line {line_error}."
return "Input follows the 'Base' format."
elif option == "Conversational":
is_valid, line_error = validate_conversational_format(data)
if not is_valid:
return f"Error: Input does not follow the required 'Conversational' format at line {line_error}."
return "Input follows the 'Conversational' format."
elif option == "Multi-turn":
is_valid, line_error = validate_multi_turn_format(data)
if not is_valid:
return f"Error: Input does not follow the required 'Multi-turn' format at line {line_error}."
return "Input follows the 'Multi-turn' format."
return "Option selected but no specific format validation implemented for this option."
except Exception as e:
return f"An error occurred: {str(e)}"
with gr.Blocks(title="Fine-tuning Formatter") as demo:
with gr.Row():
text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter text here or upload a file...")
file_input = gr.File(label="Upload CSV or JSONL file", file_types=['.csv', '.jsonl'])
with gr.Row():
option = gr.Radio(choices=["Base", "Conversational", "Multi-turn"], label="Select the processing mode", value="Base")
with gr.Row():
submit_button = gr.Button("Submit")
output = gr.Textbox(label="Output", lines=2)
submit_button.click(
fn=process_data,
inputs=[text_input, file_input, option],
outputs=output
)
demo.launch()