Spaces:

elineve
/

H2OTest

Runtime error

App Files Files Community

H2OTest / llm_studio /src /possible_values.py

elineve

Upload 301 files

07423df 11 months ago

raw

history blame contribute delete

8.43 kB

	import os
	from abc import abstractmethod
	from dataclasses import dataclass
	from typing import Any, Callable, List, Optional, Sequence, Set, Tuple, Union

	from llm_studio.src.nesting import Dependency


	def _scan_dirs(dirname) -> List[str]:
	"""Scans a directory for subfolders

	Args:
	dirname: directory name

	Returns:
	List of subfolders

	"""

	subfolders = [f.path for f in os.scandir(dirname) if f.is_dir()]
	for dirname in list(subfolders):
	subfolders.extend(_scan_dirs(dirname))
	subfolders = [x + "/" if x[-1] != "/" else x for x in subfolders]
	return subfolders


	def _scan_files(
	dirname, extensions: Tuple[str, ...] = (".csv", ".pq", ".parquet", ".json")
	) -> List[str]:
	"""Scans a directory for files with given extension

	Args:
	dirname: directory name
	extensions: extensions to consider

	Returns:
	List of files

	"""
	path_list = [
	os.path.join(dirpath, filename)
	for dirpath, _, filenames in os.walk(dirname)
	for filename in filenames
	if any(map(filename.__contains__, extensions))
	and not filename.startswith("__meta_info__")
	]
	return sorted(path_list)


	def strip_prefix(paths: Sequence[str], ignore_set: Set[str] = set()) -> Tuple[str, ...]:
	"""
	Strips the common prefix of all the given paths.

	Args:
	paths: the paths to strip
	ignore_set: set of path names to ignore when computing the prefix.

	Returns:
	List with the same length as `paths` without common prefixes.
	"""

	paths_to_check = [
	os.path.split(os.path.normpath(path))[0]
	for path in paths
	if path not in ignore_set
	]

	if len(paths_to_check) == 0:
	return tuple(paths)

	prefix = os.path.commonpath(paths_to_check)
	stripped = tuple(
	[
	path if path in ignore_set else os.path.relpath(path, prefix)
	for path in paths
	]
	)

	return stripped


	class Value:
	pass


	@dataclass
	class Number:
	min: Optional[float] = None
	max: Optional[float] = None
	step: Union[str, float] = 1.0


	@dataclass
	class String:
	# Each element of the tuple can be either:
	# - a tuple of (value, name)
	# - a string. In that case the same value will be used for name and value
	values: Any = None
	allow_custom: bool = False
	placeholder: Optional[str] = None


	class DatasetValue:
	pass

	@abstractmethod
	def get_value(
	self, dataset: Any, value: Any, type_annotation: type, mode: str
	) -> Tuple[String, Any]:
	pass

	@staticmethod
	def _compute_current_values(
	current_values: List[str],
	possible_values: List[str],
	prefer_with: Optional[Callable[[str], bool]] = None,
	) -> List[str]:
	"""
	Compute current values.

	Args:
	current_values: The preliminary current values.
	possible_values: All possible values.
	prefer_with: Function determining which values to prefer as default.

	Returns:
	A list
	"""
	if len(possible_values) == 0:
	return [""]

	# allow only values which are in the possible values
	current_values = list(
	filter(lambda value: value in possible_values, current_values)
	)

	if len(current_values) == 0:
	# if the values are empty, take all the values where `prefer_with` is true
	for c in possible_values:
	if prefer_with is not None and prefer_with(c):
	current_values.append(c)

	# if they are still empty, just take the first possible value
	if len(current_values) == 0:
	current_values = [possible_values[0]]

	return current_values


	@dataclass
	class Directories(DatasetValue):
	add_none: Union[bool, Callable[[str], bool]] = False
	prefer_with: Optional[Callable[[str], bool]] = None
	prefer_none: bool = True

	def get_value(self, dataset, value, type_annotation, mode) -> Tuple[String, Any]:
	if dataset is None:
	return String(tuple()), value

	available_dirs = _scan_dirs(dataset["path"])

	if (isinstance(self.add_none, bool) and self.add_none) or (
	callable(self.add_none) and self.add_none(mode)
	):
	if self.prefer_none:
	available_dirs.insert(0, "None")
	else:
	available_dirs.insert(len(available_dirs), "None")

	if isinstance(value, str):
	value = [value]

	value = DatasetValue._compute_current_values(
	value, available_dirs, self.prefer_with
	)

	return (
	String(
	tuple(
	zip(
	available_dirs,
	strip_prefix(available_dirs, ignore_set={"None"}),
	)
	)
	),
	value if type_annotation == Tuple[str, ...] else value[0],
	)


	@dataclass
	class Files(DatasetValue):
	add_none: Union[bool, Callable[[str], bool]] = False
	prefer_with: Optional[Callable[[str], bool]] = None
	# For the case where no match found, whether to prioritize
	# selecting any file or selecting no file
	prefer_none: bool = True

	def get_value(self, dataset, value, type_annotation, mode) -> Tuple[String, Any]:
	if dataset is None:
	return String(tuple()), value

	available_files = _scan_files(dataset["path"])

	if (isinstance(self.add_none, bool) and self.add_none) or (
	callable(self.add_none) and self.add_none(mode)
	):
	if self.prefer_none:
	available_files.insert(0, "None")
	else:
	available_files.insert(len(available_files), "None")

	if isinstance(value, str):
	value = [value]

	value = DatasetValue._compute_current_values(
	value, available_files, self.prefer_with
	)

	return (
	String(
	tuple(
	zip(
	available_files,
	strip_prefix(available_files, ignore_set={"None"}),
	)
	)
	),
	value if type_annotation == Tuple[str, ...] else value[0],
	)


	@dataclass
	class Columns(DatasetValue):
	add_none: Union[bool, Callable[[str], bool]] = False
	prefer_with: Optional[Callable[[str], bool]] = None

	def get_value(self, dataset, value, type_annotation, mode) -> Tuple[String, Any]:
	if dataset is None:
	return String(tuple()), value

	try:
	columns = list(dataset["dataframe"].columns)
	except KeyError:
	columns = []

	if (isinstance(self.add_none, bool) and self.add_none) or (
	callable(self.add_none) and self.add_none(mode)
	):
	columns.insert(0, "None")

	if isinstance(value, str):
	value = [value]
	if value is None:
	value = [columns[0]]

	value = DatasetValue._compute_current_values(value, columns, self.prefer_with)

	return (
	String(tuple(columns)),
	value if type_annotation == Tuple[str, ...] else value[0],
	)


	@dataclass
	class ColumnValue(DatasetValue):
	column: str
	default: List[str]
	prefer_with: Optional[Callable[[str], bool]] = None
	dependency: Optional[Dependency] = None

	def get_value(self, dataset, value, type_annotation, mode) -> Tuple[String, Any]:
	if dataset is None:
	return String(tuple()), value

	try:
	df = dataset["dataframe"]
	except KeyError:
	df = None

	if df is not None:
	if self.dependency is not None and not self.dependency.check(
	[dataset[self.dependency.key]]
	):
	values = self.default
	elif self.column in df:
	values = [str(v) for v in sorted(list(df[self.column].unique()))]
	else:
	values = self.default
	else:
	values = self.default

	value = DatasetValue._compute_current_values(value, values, self.prefer_with)

	return (
	String(tuple(values)),
	value if type_annotation == Tuple[str, ...] else value[0],
	)