Spaces:

yangwang825
/

datastats

Sleeping

App Files Files Community

datastats / app.py

yangwang825

Update app.py

6415a1e verified 3 months ago

raw

history blame contribute delete

No virus

4.68 kB

	import os
	import re
	import sys
	import json
	import logging
	from pathlib import Path

	import evaluate
	import numpy as np
	from datasets import Value

	logger = logging.getLogger(__name__)
	REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]")


	def infer_gradio_input_types(feature_types):
	"""
	Maps metric feature types to input types for gradio Dataframes:
	- float/int -> numbers
	- string -> strings
	- any other -> json
	Note that json is not a native gradio type but will be treated as string that
	is then parsed as a json.
	"""
	input_types = []
	for feature_type in feature_types:
	input_type = "json"
	if isinstance(feature_type, Value):
	if feature_type.dtype.startswith("int") or feature_type.dtype.startswith("float"):
	input_type = "number"
	elif feature_type.dtype == "string":
	input_type = "str"
	input_types.append(input_type)
	return input_types


	def json_to_string_type(input_types):
	"""Maps json input type to str."""
	return ["str" if i == "json" else i for i in input_types]


	def parse_readme(filepath):
	"""Parses a repositories README and removes"""
	if not os.path.exists(filepath):
	return "No README.md found."
	with open(filepath, "r") as f:
	text = f.read()
	match = REGEX_YAML_BLOCK.search(text)
	if match:
	text = text[match.end() :]
	return text


	def parse_gradio_data(data, input_types):
	"""Parses data from gradio Dataframe for use in metric."""
	metric_inputs = {}
	data.replace("", np.nan, inplace=True)
	data.dropna(inplace=True)
	for feature_name, input_type in zip(data, input_types):
	if input_type == "json":
	metric_inputs[feature_name] = [json.loads(d) for d in data[feature_name].to_list()]
	elif input_type == "str":
	metric_inputs[feature_name] = [d.strip('"') for d in data[feature_name].to_list()]
	else:
	metric_inputs[feature_name] = data[feature_name]
	return metric_inputs


	def parse_test_cases(test_cases, feature_names, input_types):
	"""
	Parses test cases to be used in gradio Dataframe. Note that an apostrophe is added
	to strings to follow the format in json.
	"""
	if len(test_cases) == 0:
	return None
	examples = []
	for test_case in test_cases:
	parsed_cases = []
	for feat, input_type in zip(feature_names, input_types):
	if input_type == "json":
	parsed_cases.append([str(element) for element in test_case[feat]])
	elif input_type == "str":
	parsed_cases.append(['"' + element + '"' for element in test_case[feat]])
	else:
	parsed_cases.append(test_case[feat])
	examples.append([list(i) for i in zip(*parsed_cases)])
	return examples


	def launch_gradio_widget(metric):
	"""Launches `metric` widget with Gradio."""

	try:
	import gradio as gr
	except ImportError as error:
	logger.info("To create a metric widget with Gradio make sure gradio is installed.")
	raise error

	local_path = Path(sys.path[0])
	# if there are several input types, use first as default.
	if isinstance(metric.features, list):
	(feature_names, feature_types) = zip(*metric.features[0].items())
	else:
	(feature_names, feature_types) = zip(*metric.features.items())
	gradio_input_types = infer_gradio_input_types(feature_types)

	def compute(summary, document):
	data = {
	"predictions": [summary],
	"references":[document]
	}
	return metric.compute(**data)
	# return metric.compute(**parse_gradio_data(data, gradio_input_types))

	iface = gr.Interface(
	fn=compute,
	# inputs=["text", "text"],
	inputs=[
	gr.components.Textbox(label="Summary"),
	gr.components.Textbox(label="Document")
	],
	# inputs=gr.components.Dataframe(
	# headers=feature_names,
	# col_count=len(feature_names),
	# row_count=1,
	# datatype=json_to_string_type(gradio_input_types),
	# ),
	outputs=gr.components.Textbox(label=metric.name),
	description=metric.info.description,
	title=f"Metric: {metric.name}",
	article=parse_readme(local_path / "README.md"),
	# TODO: load test cases and use them to populate examples
	# examples=[parse_test_cases(test_cases, feature_names, gradio_input_types)]
	)

	iface.launch()


	module = evaluate.load("yangwang825/datastats")
	launch_gradio_widget(module)