Spaces:
Running
Running
Deploy (see actual commits on https://github.com/mlcommons/croissant).
Browse files- app.py +5 -5
- core/query_params.py +9 -14
- core/state.py +14 -11
- cypress.config.js +1 -1
- events/metadata.py +3 -6
- events/resources.py +3 -3
- views/jsonld.py +1 -1
- views/metadata.py +3 -3
- views/overview.py +2 -2
- views/record_sets.py +7 -7
- views/record_sets_test.py +9 -7
- views/splash.py +2 -2
app.py
CHANGED
|
@@ -21,10 +21,10 @@ init_state()
|
|
| 21 |
user = get_user()
|
| 22 |
|
| 23 |
if OAUTH_CLIENT_ID and not user:
|
| 24 |
-
query_params = st.
|
| 25 |
-
state = query_params.
|
| 26 |
if state and state[0] == OAUTH_STATE:
|
| 27 |
-
code = query_params
|
| 28 |
if not code:
|
| 29 |
st.stop()
|
| 30 |
try:
|
|
@@ -34,7 +34,7 @@ if OAUTH_CLIENT_ID and not user:
|
|
| 34 |
except:
|
| 35 |
raise
|
| 36 |
finally:
|
| 37 |
-
st.
|
| 38 |
else:
|
| 39 |
redirect_uri = urllib.parse.quote(REDIRECT_URI, safe="")
|
| 40 |
client_id = urllib.parse.quote(OAUTH_CLIENT_ID, safe="")
|
|
@@ -48,7 +48,7 @@ if OAUTH_CLIENT_ID and not user:
|
|
| 48 |
|
| 49 |
def _back_to_menu():
|
| 50 |
"""Sends the user back to the menu."""
|
| 51 |
-
st.
|
| 52 |
init_state(force=True)
|
| 53 |
|
| 54 |
|
|
|
|
| 21 |
user = get_user()
|
| 22 |
|
| 23 |
if OAUTH_CLIENT_ID and not user:
|
| 24 |
+
query_params = st.query_params
|
| 25 |
+
state = query_params.get_all("state")
|
| 26 |
if state and state[0] == OAUTH_STATE:
|
| 27 |
+
code = query_params["code"]
|
| 28 |
if not code:
|
| 29 |
st.stop()
|
| 30 |
try:
|
|
|
|
| 34 |
except:
|
| 35 |
raise
|
| 36 |
finally:
|
| 37 |
+
st.query_params.clear()
|
| 38 |
else:
|
| 39 |
redirect_uri = urllib.parse.quote(REDIRECT_URI, safe="")
|
| 40 |
client_id = urllib.parse.quote(OAUTH_CLIENT_ID, safe="")
|
|
|
|
| 48 |
|
| 49 |
def _back_to_menu():
|
| 50 |
"""Sends the user back to the menu."""
|
| 51 |
+
st.query_params.clear()
|
| 52 |
init_state(force=True)
|
| 53 |
|
| 54 |
|
core/query_params.py
CHANGED
|
@@ -15,28 +15,24 @@ class QueryParams:
|
|
| 15 |
OPEN_RECORD_SET = "recordSet"
|
| 16 |
|
| 17 |
|
| 18 |
-
def _get_query_param(
|
| 19 |
"""Gets query param with the name `name`."""
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
return param[0]
|
| 24 |
return None
|
| 25 |
|
| 26 |
|
| 27 |
def _set_query_param(param: str, new_value: str) -> str | None:
|
| 28 |
-
params = st.
|
| 29 |
-
if params.
|
| 30 |
# The value already exists in the query params.
|
| 31 |
return
|
| 32 |
-
|
| 33 |
-
new_params[param] = new_value
|
| 34 |
-
st.experimental_set_query_params(**new_params)
|
| 35 |
|
| 36 |
|
| 37 |
def is_record_set_expanded(record_set: RecordSet) -> bool:
|
| 38 |
-
|
| 39 |
-
open_record_set_name = _get_query_param(params, QueryParams.OPEN_RECORD_SET)
|
| 40 |
if open_record_set_name:
|
| 41 |
return open_record_set_name == record_set.name
|
| 42 |
return False
|
|
@@ -47,8 +43,7 @@ def expand_record_set(record_set: RecordSet) -> None:
|
|
| 47 |
|
| 48 |
|
| 49 |
def get_project_timestamp() -> str | None:
|
| 50 |
-
|
| 51 |
-
return _get_query_param(params, QueryParams.OPEN_PROJECT)
|
| 52 |
|
| 53 |
|
| 54 |
def set_project(project: CurrentProject):
|
|
|
|
| 15 |
OPEN_RECORD_SET = "recordSet"
|
| 16 |
|
| 17 |
|
| 18 |
+
def _get_query_param(name: str) -> str | None:
|
| 19 |
"""Gets query param with the name `name`."""
|
| 20 |
+
param = st.query_params.get_all(name)
|
| 21 |
+
if isinstance(param, list) and len(param) > 0:
|
| 22 |
+
return param[0]
|
|
|
|
| 23 |
return None
|
| 24 |
|
| 25 |
|
| 26 |
def _set_query_param(param: str, new_value: str) -> str | None:
|
| 27 |
+
params = st.query_params
|
| 28 |
+
if params.get_all(param) == [new_value]:
|
| 29 |
# The value already exists in the query params.
|
| 30 |
return
|
| 31 |
+
params[param] = new_value
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
def is_record_set_expanded(record_set: RecordSet) -> bool:
|
| 35 |
+
open_record_set_name = _get_query_param(QueryParams.OPEN_RECORD_SET)
|
|
|
|
| 36 |
if open_record_set_name:
|
| 37 |
return open_record_set_name == record_set.name
|
| 38 |
return False
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
def get_project_timestamp() -> str | None:
|
| 46 |
+
return _get_query_param(QueryParams.OPEN_PROJECT)
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def set_project(project: CurrentProject):
|
core/state.py
CHANGED
|
@@ -127,6 +127,7 @@ class SelectedRecordSet:
|
|
| 127 |
class FileObject:
|
| 128 |
"""FileObject analogue for editor"""
|
| 129 |
|
|
|
|
| 130 |
name: str | None = None
|
| 131 |
description: str | None = None
|
| 132 |
contained_in: list[str] | None = dataclasses.field(default_factory=list)
|
|
@@ -135,7 +136,6 @@ class FileObject:
|
|
| 135 |
encoding_format: str | None = None
|
| 136 |
sha256: str | None = None
|
| 137 |
df: pd.DataFrame | None = None
|
| 138 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
| 139 |
folder: epath.PathLike | None = None
|
| 140 |
|
| 141 |
|
|
@@ -143,23 +143,23 @@ class FileObject:
|
|
| 143 |
class FileSet:
|
| 144 |
"""FileSet analogue for editor"""
|
| 145 |
|
|
|
|
| 146 |
contained_in: list[str] = dataclasses.field(default_factory=list)
|
| 147 |
description: str | None = None
|
| 148 |
encoding_format: str | None = ""
|
| 149 |
includes: str | None = ""
|
| 150 |
name: str = ""
|
| 151 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
| 152 |
|
| 153 |
|
| 154 |
@dataclasses.dataclass
|
| 155 |
class Field:
|
| 156 |
"""Field analogue for editor"""
|
| 157 |
|
|
|
|
| 158 |
name: str | None = None
|
| 159 |
description: str | None = None
|
| 160 |
data_types: str | list[str] | None = None
|
| 161 |
source: mlc.Source | None = None
|
| 162 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
| 163 |
references: mlc.Source | None = None
|
| 164 |
|
| 165 |
|
|
@@ -167,13 +167,13 @@ class Field:
|
|
| 167 |
class RecordSet:
|
| 168 |
"""Record Set analogue for editor"""
|
| 169 |
|
|
|
|
| 170 |
name: str = ""
|
| 171 |
data: list[Any] | None = None
|
| 172 |
description: str | None = None
|
| 173 |
is_enumeration: bool | None = None
|
| 174 |
key: str | list[str] | None = None
|
| 175 |
fields: list[Field] = dataclasses.field(default_factory=list)
|
| 176 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
| 177 |
|
| 178 |
|
| 179 |
@dataclasses.dataclass
|
|
@@ -182,9 +182,10 @@ class Metadata:
|
|
| 182 |
|
| 183 |
name: str = ""
|
| 184 |
description: str | None = None
|
| 185 |
-
|
| 186 |
-
|
| 187 |
creators: list[mlc.PersonOrOrganization] = dataclasses.field(default_factory=list)
|
|
|
|
| 188 |
data_biases: str | None = None
|
| 189 |
data_collection: str | None = None
|
| 190 |
date_published: datetime.datetime | None = None
|
|
@@ -193,7 +194,6 @@ class Metadata:
|
|
| 193 |
url: str = ""
|
| 194 |
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
|
| 195 |
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
|
| 196 |
-
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
|
| 197 |
version: str | None = None
|
| 198 |
|
| 199 |
def __bool__(self):
|
|
@@ -295,17 +295,20 @@ class Metadata:
|
|
| 295 |
|
| 296 |
def to_canonical(self) -> mlc.Metadata:
|
| 297 |
distribution = []
|
|
|
|
| 298 |
for file in self.distribution:
|
| 299 |
if isinstance(file, FileObject):
|
| 300 |
-
distribution.append(create_class(mlc.FileObject, file))
|
| 301 |
elif isinstance(file, FileSet):
|
| 302 |
-
distribution.append(create_class(mlc.FileSet, file))
|
| 303 |
record_sets = []
|
| 304 |
for record_set in self.record_sets:
|
| 305 |
fields = []
|
| 306 |
for field in record_set.fields:
|
| 307 |
-
fields.append(create_class(mlc.Field, field))
|
| 308 |
-
record_sets.append(
|
|
|
|
|
|
|
| 309 |
return create_class(
|
| 310 |
mlc.Metadata,
|
| 311 |
self,
|
|
|
|
| 127 |
class FileObject:
|
| 128 |
"""FileObject analogue for editor"""
|
| 129 |
|
| 130 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
| 131 |
name: str | None = None
|
| 132 |
description: str | None = None
|
| 133 |
contained_in: list[str] | None = dataclasses.field(default_factory=list)
|
|
|
|
| 136 |
encoding_format: str | None = None
|
| 137 |
sha256: str | None = None
|
| 138 |
df: pd.DataFrame | None = None
|
|
|
|
| 139 |
folder: epath.PathLike | None = None
|
| 140 |
|
| 141 |
|
|
|
|
| 143 |
class FileSet:
|
| 144 |
"""FileSet analogue for editor"""
|
| 145 |
|
| 146 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
| 147 |
contained_in: list[str] = dataclasses.field(default_factory=list)
|
| 148 |
description: str | None = None
|
| 149 |
encoding_format: str | None = ""
|
| 150 |
includes: str | None = ""
|
| 151 |
name: str = ""
|
|
|
|
| 152 |
|
| 153 |
|
| 154 |
@dataclasses.dataclass
|
| 155 |
class Field:
|
| 156 |
"""Field analogue for editor"""
|
| 157 |
|
| 158 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
| 159 |
name: str | None = None
|
| 160 |
description: str | None = None
|
| 161 |
data_types: str | list[str] | None = None
|
| 162 |
source: mlc.Source | None = None
|
|
|
|
| 163 |
references: mlc.Source | None = None
|
| 164 |
|
| 165 |
|
|
|
|
| 167 |
class RecordSet:
|
| 168 |
"""Record Set analogue for editor"""
|
| 169 |
|
| 170 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
| 171 |
name: str = ""
|
| 172 |
data: list[Any] | None = None
|
| 173 |
description: str | None = None
|
| 174 |
is_enumeration: bool | None = None
|
| 175 |
key: str | list[str] | None = None
|
| 176 |
fields: list[Field] = dataclasses.field(default_factory=list)
|
|
|
|
| 177 |
|
| 178 |
|
| 179 |
@dataclasses.dataclass
|
|
|
|
| 182 |
|
| 183 |
name: str = ""
|
| 184 |
description: str | None = None
|
| 185 |
+
cite_as: str | None = None
|
| 186 |
+
context: dict = dataclasses.field(default_factory=dict)
|
| 187 |
creators: list[mlc.PersonOrOrganization] = dataclasses.field(default_factory=list)
|
| 188 |
+
ctx: mlc.Context = dataclasses.field(default_factory=mlc.Context)
|
| 189 |
data_biases: str | None = None
|
| 190 |
data_collection: str | None = None
|
| 191 |
date_published: datetime.datetime | None = None
|
|
|
|
| 194 |
url: str = ""
|
| 195 |
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
|
| 196 |
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
|
|
|
|
| 197 |
version: str | None = None
|
| 198 |
|
| 199 |
def __bool__(self):
|
|
|
|
| 295 |
|
| 296 |
def to_canonical(self) -> mlc.Metadata:
|
| 297 |
distribution = []
|
| 298 |
+
ctx = self.ctx
|
| 299 |
for file in self.distribution:
|
| 300 |
if isinstance(file, FileObject):
|
| 301 |
+
distribution.append(create_class(mlc.FileObject, file, ctx=ctx))
|
| 302 |
elif isinstance(file, FileSet):
|
| 303 |
+
distribution.append(create_class(mlc.FileSet, file, ctx=ctx))
|
| 304 |
record_sets = []
|
| 305 |
for record_set in self.record_sets:
|
| 306 |
fields = []
|
| 307 |
for field in record_set.fields:
|
| 308 |
+
fields.append(create_class(mlc.Field, field, ctx=ctx))
|
| 309 |
+
record_sets.append(
|
| 310 |
+
create_class(mlc.RecordSet, record_set, ctx=ctx, fields=fields)
|
| 311 |
+
)
|
| 312 |
return create_class(
|
| 313 |
mlc.Metadata,
|
| 314 |
self,
|
cypress.config.js
CHANGED
|
@@ -3,6 +3,6 @@ const { defineConfig } = require("cypress");
|
|
| 3 |
module.exports = defineConfig({
|
| 4 |
// To access content within Streamlit iframes for custom components:
|
| 5 |
chromeWebSecurity: false,
|
| 6 |
-
defaultCommandTimeout:
|
| 7 |
e2e: {},
|
| 8 |
});
|
|
|
|
| 3 |
module.exports = defineConfig({
|
| 4 |
// To access content within Streamlit iframes for custom components:
|
| 5 |
chromeWebSecurity: false,
|
| 6 |
+
defaultCommandTimeout: 20000,
|
| 7 |
e2e: {},
|
| 8 |
});
|
events/metadata.py
CHANGED
|
@@ -92,12 +92,11 @@ class MetadataEvent(enum.Enum):
|
|
| 92 |
"""Event that triggers a metadata change."""
|
| 93 |
|
| 94 |
NAME = "NAME"
|
| 95 |
-
CONFORMS_TO = "CONFORMS_TO"
|
| 96 |
DESCRIPTION = "DESCRIPTION"
|
| 97 |
DATE_PUBLISHED = "DATE_PUBLISHED"
|
| 98 |
URL = "URL"
|
| 99 |
LICENSE = "LICENSE"
|
| 100 |
-
|
| 101 |
VERSION = "VERSION"
|
| 102 |
DATA_BIASES = "DATA_BIASES"
|
| 103 |
DATA_COLLECTION = "DATA_COLLECTION"
|
|
@@ -111,14 +110,12 @@ class MetadataEvent(enum.Enum):
|
|
| 111 |
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
| 112 |
if event == MetadataEvent.NAME:
|
| 113 |
metadata.name = find_unique_name(set(), st.session_state[key])
|
| 114 |
-
if event == MetadataEvent.CONFORMS_TO:
|
| 115 |
-
metadata.conforms_to = st.session_state[key]
|
| 116 |
elif event == MetadataEvent.DESCRIPTION:
|
| 117 |
metadata.description = st.session_state[key]
|
| 118 |
elif event == MetadataEvent.LICENSE:
|
| 119 |
metadata.license = LICENSES.get(st.session_state[key])
|
| 120 |
-
elif event == MetadataEvent.
|
| 121 |
-
metadata.
|
| 122 |
elif event == MetadataEvent.URL:
|
| 123 |
metadata.url = st.session_state[key]
|
| 124 |
elif event == MetadataEvent.VERSION:
|
|
|
|
| 92 |
"""Event that triggers a metadata change."""
|
| 93 |
|
| 94 |
NAME = "NAME"
|
|
|
|
| 95 |
DESCRIPTION = "DESCRIPTION"
|
| 96 |
DATE_PUBLISHED = "DATE_PUBLISHED"
|
| 97 |
URL = "URL"
|
| 98 |
LICENSE = "LICENSE"
|
| 99 |
+
CITE_AS = "CITE_AS"
|
| 100 |
VERSION = "VERSION"
|
| 101 |
DATA_BIASES = "DATA_BIASES"
|
| 102 |
DATA_COLLECTION = "DATA_COLLECTION"
|
|
|
|
| 110 |
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
|
| 111 |
if event == MetadataEvent.NAME:
|
| 112 |
metadata.name = find_unique_name(set(), st.session_state[key])
|
|
|
|
|
|
|
| 113 |
elif event == MetadataEvent.DESCRIPTION:
|
| 114 |
metadata.description = st.session_state[key]
|
| 115 |
elif event == MetadataEvent.LICENSE:
|
| 116 |
metadata.license = LICENSES.get(st.session_state[key])
|
| 117 |
+
elif event == MetadataEvent.CITE_AS:
|
| 118 |
+
metadata.cite_as = st.session_state[key]
|
| 119 |
elif event == MetadataEvent.URL:
|
| 120 |
metadata.url = st.session_state[key]
|
| 121 |
elif event == MetadataEvent.VERSION:
|
events/resources.py
CHANGED
|
@@ -72,6 +72,6 @@ def _create_instance1_from_instance2(instance1: Resource, instance2: type):
|
|
| 72 |
attributes1 = set((field.name for field in dataclasses.fields(instance1)))
|
| 73 |
attributes2 = set((field.name for field in dataclasses.fields(instance2)))
|
| 74 |
common_attributes = attributes2.intersection(attributes1)
|
| 75 |
-
return instance2(
|
| 76 |
-
attribute: getattr(instance1, attribute) for attribute in common_attributes
|
| 77 |
-
|
|
|
|
| 72 |
attributes1 = set((field.name for field in dataclasses.fields(instance1)))
|
| 73 |
attributes2 = set((field.name for field in dataclasses.fields(instance2)))
|
| 74 |
common_attributes = attributes2.intersection(attributes1)
|
| 75 |
+
return instance2(
|
| 76 |
+
**{attribute: getattr(instance1, attribute) for attribute in common_attributes}
|
| 77 |
+
)
|
views/jsonld.py
CHANGED
|
@@ -47,7 +47,7 @@ def render_jsonld():
|
|
| 47 |
if croissant.metadata:
|
| 48 |
metadata = mlc.Metadata(
|
| 49 |
name=croissant.metadata.name,
|
| 50 |
-
|
| 51 |
license=croissant.metadata.license,
|
| 52 |
description=croissant.metadata.description,
|
| 53 |
url=croissant.metadata.url,
|
|
|
|
| 47 |
if croissant.metadata:
|
| 48 |
metadata = mlc.Metadata(
|
| 49 |
name=croissant.metadata.name,
|
| 50 |
+
cite_as=croissant.metadata.cite_as,
|
| 51 |
license=croissant.metadata.license,
|
| 52 |
description=croissant.metadata.description,
|
| 53 |
url=croissant.metadata.url,
|
views/metadata.py
CHANGED
|
@@ -97,14 +97,14 @@ def _render_generic_metadata(metadata: Metadata):
|
|
| 97 |
on_change=handle_metadata_change,
|
| 98 |
args=(MetadataEvent.LICENSE, metadata, key),
|
| 99 |
)
|
| 100 |
-
key = "metadata-
|
| 101 |
st.text_area(
|
| 102 |
label="Citation",
|
| 103 |
key=key,
|
| 104 |
-
value=metadata.
|
| 105 |
placeholder="@book{\n title={Title}\n}",
|
| 106 |
on_change=handle_metadata_change,
|
| 107 |
-
args=(MetadataEvent.
|
| 108 |
)
|
| 109 |
key = "metadata-date-published"
|
| 110 |
st.date_input(
|
|
|
|
| 97 |
on_change=handle_metadata_change,
|
| 98 |
args=(MetadataEvent.LICENSE, metadata, key),
|
| 99 |
)
|
| 100 |
+
key = "metadata-cite-as"
|
| 101 |
st.text_area(
|
| 102 |
label="Citation",
|
| 103 |
key=key,
|
| 104 |
+
value=metadata.cite_as,
|
| 105 |
placeholder="@book{\n title={Title}\n}",
|
| 106 |
on_change=handle_metadata_change,
|
| 107 |
+
args=(MetadataEvent.CITE_AS, metadata, key),
|
| 108 |
)
|
| 109 |
key = "metadata-date-published"
|
| 110 |
st.date_input(
|
views/overview.py
CHANGED
|
@@ -10,7 +10,7 @@ from utils import needed_field
|
|
| 10 |
from views.metadata import handle_metadata_change
|
| 11 |
from views.metadata import MetadataEvent
|
| 12 |
|
| 13 |
-
_NON_RELEVANT_METADATA = ["
|
| 14 |
|
| 15 |
_INFO_TEXT = """Croissant files are composed of three layers:
|
| 16 |
|
|
@@ -98,7 +98,7 @@ def render_overview():
|
|
| 98 |
if user_started_editing:
|
| 99 |
warning = ""
|
| 100 |
try:
|
| 101 |
-
issues = metadata.to_canonical().issues
|
| 102 |
if issues.errors:
|
| 103 |
warning += "**Errors**\n"
|
| 104 |
for error in issues.errors:
|
|
|
|
| 10 |
from views.metadata import handle_metadata_change
|
| 11 |
from views.metadata import MetadataEvent
|
| 12 |
|
| 13 |
+
_NON_RELEVANT_METADATA = ["ctx", "name", "distribution", "record_sets"]
|
| 14 |
|
| 15 |
_INFO_TEXT = """Croissant files are composed of three layers:
|
| 16 |
|
|
|
|
| 98 |
if user_started_editing:
|
| 99 |
warning = ""
|
| 100 |
try:
|
| 101 |
+
issues = metadata.to_canonical().ctx.issues
|
| 102 |
if issues.errors:
|
| 103 |
warning += "**Errors**\n"
|
| 104 |
for error in issues.errors:
|
views/record_sets.py
CHANGED
|
@@ -44,11 +44,9 @@ class _Result(TypedDict):
|
|
| 44 |
@st.cache_data(
|
| 45 |
show_spinner="Generating the dataset...",
|
| 46 |
hash_funcs={
|
| 47 |
-
"
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
"mlcroissant.FileSet": hash,
|
| 51 |
-
"mlcroissant.RecordSet": hash,
|
| 52 |
},
|
| 53 |
)
|
| 54 |
def _generate_data_with_timeout(record_set: RecordSet) -> _Result:
|
|
@@ -385,11 +383,13 @@ def _render_left_panel():
|
|
| 385 |
"⚠️",
|
| 386 |
key=f"idea-{prefix}",
|
| 387 |
on_click=lambda: _generate_data_with_timeout.clear(),
|
| 388 |
-
help=textwrap.dedent(
|
|
|
|
| 389 |
```
|
| 390 |
{exception}
|
| 391 |
```
|
| 392 |
-
"""
|
|
|
|
| 393 |
)
|
| 394 |
right.markdown("No preview is possible.")
|
| 395 |
|
|
|
|
| 44 |
@st.cache_data(
|
| 45 |
show_spinner="Generating the dataset...",
|
| 46 |
hash_funcs={
|
| 47 |
+
"core.state.RecordSet": lambda record_set: hash(
|
| 48 |
+
(record_set.name, record_set.description)
|
| 49 |
+
),
|
|
|
|
|
|
|
| 50 |
},
|
| 51 |
)
|
| 52 |
def _generate_data_with_timeout(record_set: RecordSet) -> _Result:
|
|
|
|
| 383 |
"⚠️",
|
| 384 |
key=f"idea-{prefix}",
|
| 385 |
on_click=lambda: _generate_data_with_timeout.clear(),
|
| 386 |
+
help=textwrap.dedent(
|
| 387 |
+
f"""**Error**:
|
| 388 |
```
|
| 389 |
{exception}
|
| 390 |
```
|
| 391 |
+
"""
|
| 392 |
+
),
|
| 393 |
)
|
| 394 |
right.markdown("No preview is possible.")
|
| 395 |
|
views/record_sets_test.py
CHANGED
|
@@ -19,10 +19,12 @@ def test_find_joins():
|
|
| 19 |
references=mlc.Source(uid="some_other_record_set/some_other_field"),
|
| 20 |
),
|
| 21 |
]
|
| 22 |
-
assert _find_joins(fields) == set(
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
(
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
| 19 |
references=mlc.Source(uid="some_other_record_set/some_other_field"),
|
| 20 |
),
|
| 21 |
]
|
| 22 |
+
assert _find_joins(fields) == set(
|
| 23 |
+
[
|
| 24 |
+
(("some_csv", "some_column"), ("some_record_set", "some_field")),
|
| 25 |
+
(
|
| 26 |
+
("some_record_set", "some_field"),
|
| 27 |
+
("some_other_record_set", "some_other_field"),
|
| 28 |
+
),
|
| 29 |
+
]
|
| 30 |
+
)
|
views/splash.py
CHANGED
|
@@ -65,11 +65,11 @@ def render_splash():
|
|
| 65 |
with st.expander("**Load an existing dataset**", expanded=True):
|
| 66 |
|
| 67 |
def create_example(dataset: str):
|
| 68 |
-
base = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/{dataset.lower()}"
|
| 69 |
url = f"{base}/metadata.json"
|
| 70 |
try:
|
| 71 |
json = requests.get(url).json()
|
| 72 |
-
metadata = mlc.Metadata.from_json(mlc.
|
| 73 |
st.session_state[Metadata] = Metadata.from_canonical(metadata)
|
| 74 |
save_current_project()
|
| 75 |
# Write supplementary files.
|
|
|
|
| 65 |
with st.expander("**Load an existing dataset**", expanded=True):
|
| 66 |
|
| 67 |
def create_example(dataset: str):
|
| 68 |
+
base = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/1.0/{dataset.lower()}"
|
| 69 |
url = f"{base}/metadata.json"
|
| 70 |
try:
|
| 71 |
json = requests.get(url).json()
|
| 72 |
+
metadata = mlc.Metadata.from_json(mlc.Context(), json)
|
| 73 |
st.session_state[Metadata] = Metadata.from_canonical(metadata)
|
| 74 |
save_current_project()
|
| 75 |
# Write supplementary files.
|