Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -99,8 +99,7 @@ def _sorted_split_key(split: str) -> str:
|
|
99 |
@lru_cache(maxsize=128)
|
100 |
def get_parquet_splits(dataset: str, config: str) -> List[str]:
|
101 |
fs = get_parquet_fs(dataset)
|
102 |
-
|
103 |
-
return sorted(set(parts[-4] if len(parts) > 3 and parts[-2] == "of" else parts[-1] for parts in all_parts), key=_sorted_split_key)
|
104 |
|
105 |
|
106 |
#####################################################
|
@@ -114,7 +113,7 @@ RowGroupReaders = List[Callable[[], pa.Table]]
|
|
114 |
@lru_cache(maxsize=128)
|
115 |
def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
|
116 |
fs = get_parquet_fs(dataset)
|
117 |
-
sources = fs.glob(f"{config}
|
118 |
if not sources:
|
119 |
if config not in get_parquet_configs(dataset):
|
120 |
raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
|
|
|
99 |
@lru_cache(maxsize=128)
|
100 |
def get_parquet_splits(dataset: str, config: str) -> List[str]:
|
101 |
fs = get_parquet_fs(dataset)
|
102 |
+
return [path for path in fs.ls(config) if fs.isdir(path)]
|
|
|
103 |
|
104 |
|
105 |
#####################################################
|
|
|
113 |
@lru_cache(maxsize=128)
|
114 |
def index(dataset: str, config: str, split: str) -> Tuple[np.ndarray, RowGroupReaders, int, Features]:
|
115 |
fs = get_parquet_fs(dataset)
|
116 |
+
sources = fs.glob(f"{config}/{split}/*.parquet")
|
117 |
if not sources:
|
118 |
if config not in get_parquet_configs(dataset):
|
119 |
raise AppError(f"Invalid config {config}. Available configs are: {', '.join(get_parquet_configs(dataset))}.")
|