marcenacp commited on
Commit
edf454b
1 Parent(s): 6a31b9a

Deploy (see actual commits on https://github.com/mlcommons/croissant).

Browse files
app.py CHANGED
@@ -20,7 +20,6 @@ col1.header("Croissant Editor")
20
  init_state()
21
 
22
  user = get_cached_user()
23
- print("USER", user)
24
 
25
  if OAUTH_CLIENT_ID and not user:
26
  query_params = st.experimental_get_query_params()
 
20
  init_state()
21
 
22
  user = get_cached_user()
 
23
 
24
  if OAUTH_CLIENT_ID and not user:
25
  query_params = st.experimental_get_query_params()
core/constants.py CHANGED
@@ -33,5 +33,5 @@ DF_HEIGHT = 150
33
  OVERVIEW = "Overview"
34
  METADATA = "Metadata"
35
  RESOURCES = "Resources"
36
- RECORD_SETS = "RecordSets"
37
  TABS = [OVERVIEW, METADATA, RESOURCES, RECORD_SETS]
 
33
  OVERVIEW = "Overview"
34
  METADATA = "Metadata"
35
  RESOURCES = "Resources"
36
+ RECORD_SETS = "Record Sets"
37
  TABS = [OVERVIEW, METADATA, RESOURCES, RECORD_SETS]
core/state.py CHANGED
@@ -168,7 +168,7 @@ class RecordSet:
168
  """Record Set analogue for editor"""
169
 
170
  name: str = ""
171
- data: Any = None
172
  description: str | None = None
173
  is_enumeration: bool | None = None
174
  key: str | list[str] | None = None
@@ -208,9 +208,14 @@ class Metadata:
208
  """Renames a RecordSet by changing all the references to this RecordSet."""
209
  for i, record_set in enumerate(self.record_sets):
210
  for j, field in enumerate(record_set.fields):
 
211
  # Update source
212
  source = field.source
213
- if source and source.uid and source.uid.startswith(old_name):
 
 
 
 
214
  new_uid = source.uid.replace(old_name, new_name, 1)
215
  self.record_sets[i].fields[j].source.uid = new_uid
216
  # Update references
@@ -218,7 +223,10 @@ class Metadata:
218
  if (
219
  references
220
  and references.uid
221
- and references.uid.startswith(old_name)
 
 
 
222
  ):
223
  new_uid = references.uid.replace(old_name, new_name, 1)
224
  self.record_sets[i].fields[j].references.uid = new_uid
 
168
  """Record Set analogue for editor"""
169
 
170
  name: str = ""
171
+ data: list[Any] | None = None
172
  description: str | None = None
173
  is_enumeration: bool | None = None
174
  key: str | list[str] | None = None
 
208
  """Renames a RecordSet by changing all the references to this RecordSet."""
209
  for i, record_set in enumerate(self.record_sets):
210
  for j, field in enumerate(record_set.fields):
211
+ possible_uid = f"{old_name}/"
212
  # Update source
213
  source = field.source
214
+ if (
215
+ source
216
+ and source.uid
217
+ and (source.uid.startswith(possible_uid) or source.uid == old_name)
218
+ ):
219
  new_uid = source.uid.replace(old_name, new_name, 1)
220
  self.record_sets[i].fields[j].source.uid = new_uid
221
  # Update references
 
223
  if (
224
  references
225
  and references.uid
226
+ and (
227
+ references.uid.startswith(possible_uid)
228
+ or references.uid == old_name
229
+ )
230
  ):
231
  new_uid = references.uid.replace(old_name, new_name, 1)
232
  self.record_sets[i].fields[j].references.uid = new_uid
deploy_to_hf.sh CHANGED
@@ -3,12 +3,15 @@ echo "Deleting $HF_REPO..."
3
  rm -rf ${HF_REPO}
4
  git clone git@hf.co:spaces/marcenacp/croissant-editor ${HF_REPO}
5
  echo "Copying files from $PWD to $HF_REPO..."
6
- rsync -aP --exclude="README.md" --exclude="*node_modules*" --exclude="*__pycache__*" . ${HF_REPO}
7
  cd ${HF_REPO}
8
- echo "Now push with: 'cd $HF_REPO && git add && git commit && git push'."
 
 
9
  echo "Warning: if it fails, you may need to follow https://huggingface.co/docs/hub/security-git-ssh#generating-a-new-ssh-keypair"
10
  echo "On Hugging Face Spaces, you might have to set the following environment variables:"
11
  echo "- REDIRECT_URI"
12
  echo "- OAUTH_STATE"
13
  echo "- OAUTH_CLIENT_ID"
14
  echo "- OAUTH_CLIENT_SECRET"
 
 
3
  rm -rf ${HF_REPO}
4
  git clone git@hf.co:spaces/marcenacp/croissant-editor ${HF_REPO}
5
  echo "Copying files from $PWD to $HF_REPO..."
6
+ rsync -aP --exclude="README.md" --exclude="*node_modules*" --exclude="cypress/*" --exclude="*__pycache__*" . ${HF_REPO}
7
  cd ${HF_REPO}
8
+ git add .
9
+ git commit -m "Deploy (see actual commits on https://github.com/mlcommons/croissant)."
10
+ echo "Now push with: 'cd $HF_REPO && git push'."
11
  echo "Warning: if it fails, you may need to follow https://huggingface.co/docs/hub/security-git-ssh#generating-a-new-ssh-keypair"
12
  echo "On Hugging Face Spaces, you might have to set the following environment variables:"
13
  echo "- REDIRECT_URI"
14
  echo "- OAUTH_STATE"
15
  echo "- OAUTH_CLIENT_ID"
16
  echo "- OAUTH_CLIENT_SECRET"
17
+ echo "Visit: https://huggingface.co/spaces/marcenacp/croissant-editor"
events/record_sets.py CHANGED
@@ -13,6 +13,8 @@ class RecordSetEvent(enum.Enum):
13
  NAME = "NAME"
14
  DESCRIPTION = "DESCRIPTION"
15
  IS_ENUMERATION = "IS_ENUMERATION"
 
 
16
 
17
 
18
  def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key: str):
@@ -28,4 +30,16 @@ def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key:
28
  record_set.description = value
29
  elif event == RecordSetEvent.IS_ENUMERATION:
30
  record_set.is_enumeration = value
 
 
 
 
 
 
 
 
 
 
 
 
31
  expand_record_set(record_set=record_set)
 
13
  NAME = "NAME"
14
  DESCRIPTION = "DESCRIPTION"
15
  IS_ENUMERATION = "IS_ENUMERATION"
16
+ HAS_DATA = "HAS_DATA"
17
+ CHANGE_DATA = "CHANGE_DATA"
18
 
19
 
20
  def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key: str):
 
30
  record_set.description = value
31
  elif event == RecordSetEvent.IS_ENUMERATION:
32
  record_set.is_enumeration = value
33
+ elif event == RecordSetEvent.HAS_DATA:
34
+ if value:
35
+ record_set.data = []
36
+ else:
37
+ record_set.data = None
38
+ elif event == RecordSetEvent.CHANGE_DATA:
39
+ for index, new_value in value["edited_rows"].items():
40
+ record_set.data[index] = {**record_set.data[index], **new_value}
41
+ for row in value["added_rows"]:
42
+ record_set.data.append(row)
43
+ for row in value["deleted_rows"]:
44
+ del record_set.data[row]
45
  expand_record_set(record_set=record_set)
views/foo.py.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import multiprocessing
2
+ import time
3
+ from typing import TypedDict
4
+
5
+
6
+ class _Result(TypedDict):
7
+ bar: int
8
+
9
+
10
+ def bar(result):
11
+ while True:
12
+ time.sleep(1)
13
+ result["bar"] += 1
14
+ print(result["bar"])
15
+ if result["bar"] > 5:
16
+ return
17
+
18
+
19
+ def foo():
20
+ """Generates the data and waits at most _TIMEOUT_SECONDS."""
21
+ with multiprocessing.Manager() as manager:
22
+ result: _Result = manager.dict(bar=0)
23
+ process = multiprocessing.Process(target=bar, args=(result,))
24
+ process.start()
25
+ if not process.is_alive():
26
+ return result
27
+ time.sleep(3)
28
+ if process.is_alive():
29
+ process.kill()
30
+ result["exception"] = TimeoutError(
31
+ "The generation took too long and was killed."
32
+ )
33
+ return _Result(**result)
34
+
35
+
36
+ print("FINAL RESULT", foo().get("bar"))
views/overview.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Any
2
 
3
  import streamlit as st
@@ -8,12 +9,22 @@ from utils import needed_field
8
  from views.metadata import handle_metadata_change
9
  from views.metadata import MetadataEvent
10
 
 
11
 
12
- def _plural(array: list[Any]):
13
- if array:
14
- return "s"
15
- else:
16
- return ""
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def render_overview():
@@ -21,7 +32,7 @@ def render_overview():
21
  col1, col2 = st.columns([1, 1], gap="medium")
22
  with col1:
23
  key = "metadata-name"
24
- st.text_input(
25
  label=needed_field("Name"),
26
  key=key,
27
  value=metadata.name,
@@ -29,8 +40,10 @@ def render_overview():
29
  on_change=handle_metadata_change,
30
  args=(MetadataEvent.NAME, metadata, key),
31
  )
 
 
32
  key = "metadata-url"
33
- st.text_input(
34
  label=needed_field("URL"),
35
  key=key,
36
  value=metadata.url,
@@ -38,6 +51,8 @@ def render_overview():
38
  on_change=handle_metadata_change,
39
  args=(MetadataEvent.URL, metadata, key),
40
  )
 
 
41
  key = "metadata-description"
42
  st.text_area(
43
  label="Description",
@@ -47,29 +62,35 @@ def render_overview():
47
  on_change=handle_metadata_change,
48
  args=(MetadataEvent.DESCRIPTION, metadata, key),
49
  )
50
-
51
- st.subheader(
52
- f"{len(metadata.distribution)} File" + _plural(metadata.distribution)
53
- )
54
- st.subheader(
55
- f"{len(metadata.record_sets)} Record Set" + _plural(metadata.distribution)
56
- )
 
 
 
57
  with col2:
58
  user_started_editing = metadata.record_sets or metadata.distribution
59
  if user_started_editing:
60
- st.subheader("Croissant File Validation")
61
  try:
62
  issues = metadata.to_canonical().issues
63
  if issues.errors:
64
- st.markdown("##### Errors:")
65
  for error in issues.errors:
66
- st.write(error)
67
  if issues.warnings:
68
- st.markdown("##### Warnings:")
69
  for warning in issues.warnings:
70
- st.write(warning)
71
- if not issues.errors and not issues.warnings:
72
- st.write("No validation issues detected!")
73
  except mlc.ValidationError as exception:
74
- st.markdown("##### Errors:")
75
- st.write(str(exception))
 
 
 
 
 
 
1
+ import dataclasses
2
  from typing import Any
3
 
4
  import streamlit as st
 
9
  from views.metadata import handle_metadata_change
10
  from views.metadata import MetadataEvent
11
 
12
+ _NON_RELEVANT_METADATA = ["name", "distribution", "record_sets", "rdf"]
13
 
14
+ _INFO_TEXT = """Croissant files are composed of three layers:
15
+
16
+ - **Metadata** about the dataset covering Responsible AI, licensing and attributes of
17
+ [sc\:Dataset](https://schema.org/Dataset).
18
+ - **Resources**: The contents of a dataset as the underlying files
19
+ ([`FileObject`](https://github.com/mlcommons/croissant/blob/main/docs/croissant-spec.md#fileobject))
20
+ and/or sets of files ([`FileSet`](https://github.com/mlcommons/croissant/blob/main/docs/croissant-spec.md#fileset)).
21
+ - **RecordSets**: the sets of structured records obtained from one or more resources
22
+ (typically a file or set of files) and the structure of these records,
23
+ expressed as a set of fields (e.g., the columns of a table).
24
+
25
+ The next three tabs will guide you through filling those layers. The errors if any will
26
+ be displayed on this page. Once you are ready, you can download the dataset by clicking
27
+ the export button in the upper right corner."""
28
 
29
 
30
  def render_overview():
 
32
  col1, col2 = st.columns([1, 1], gap="medium")
33
  with col1:
34
  key = "metadata-name"
35
+ name = st.text_input(
36
  label=needed_field("Name"),
37
  key=key,
38
  value=metadata.name,
 
40
  on_change=handle_metadata_change,
41
  args=(MetadataEvent.NAME, metadata, key),
42
  )
43
+ if not name:
44
+ st.stop()
45
  key = "metadata-url"
46
+ url = st.text_input(
47
  label=needed_field("URL"),
48
  key=key,
49
  value=metadata.url,
 
51
  on_change=handle_metadata_change,
52
  args=(MetadataEvent.URL, metadata, key),
53
  )
54
+ if not url:
55
+ st.stop()
56
  key = "metadata-description"
57
  st.text_area(
58
  label="Description",
 
62
  on_change=handle_metadata_change,
63
  args=(MetadataEvent.DESCRIPTION, metadata, key),
64
  )
65
+ st.divider()
66
+ left, middle, right = st.columns([1, 1, 1])
67
+ fields = [
68
+ field
69
+ for field, value in dataclasses.asdict(metadata).items()
70
+ if value and field not in _NON_RELEVANT_METADATA
71
+ ]
72
+ left.metric("Number of metadata", len(fields))
73
+ middle.metric("Number of resources", len(metadata.distribution))
74
+ right.metric("Number of RecordSets", len(metadata.record_sets))
75
  with col2:
76
  user_started_editing = metadata.record_sets or metadata.distribution
77
  if user_started_editing:
78
+ warning = ""
79
  try:
80
  issues = metadata.to_canonical().issues
81
  if issues.errors:
82
+ warning += "**Errors**\n"
83
  for error in issues.errors:
84
+ warning += f"{error}\n"
85
  if issues.warnings:
86
+ warning += "**Warnings**\n"
87
  for warning in issues.warnings:
88
+ warning += f"{warning}\n"
 
 
89
  except mlc.ValidationError as exception:
90
+ warning += "**Errors**\n"
91
+ warning += f"{str(exception)}\n"
92
+ if warning:
93
+ st.warning(warning, icon="⚠️")
94
+ else:
95
+ st.success("No validation issues detected!", icon="✅")
96
+ st.info(_INFO_TEXT, icon="💡")
views/record_sets.py CHANGED
@@ -1,4 +1,7 @@
1
- from typing import Any
 
 
 
2
 
3
  import numpy as np
4
  import pandas as pd
@@ -28,6 +31,65 @@ DATA_TYPES = [
28
  mlc.DataType.URL,
29
  ]
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def _handle_close_fields():
33
  st.session_state[SelectedRecordSet] = None
@@ -116,23 +178,22 @@ def _handle_fields_change(record_set_key: int, record_set: RecordSet):
116
  name=added_row.get(FieldDataFrame.NAME),
117
  description=added_row.get(FieldDataFrame.DESCRIPTION),
118
  data_types=[added_row.get(FieldDataFrame.DATA_TYPE)],
119
- source=mlc.Source(
120
- uid="foo",
121
- node_type="distribution",
122
- extract=mlc.Extract(column=""),
123
- ),
124
  references=mlc.Source(),
125
  )
126
  st.session_state[Metadata].add_field(record_set_key, field)
127
  for field_key in result["deleted_rows"]:
128
  st.session_state[Metadata].remove_field(record_set_key, field_key)
 
 
 
129
 
130
 
131
  class FieldDataFrame:
132
  """Names of the columns in the pd.DataFrame for `fields`."""
133
 
134
- NAME = "Name"
135
- DESCRIPTION = "Description"
136
  DATA_TYPE = "Data type"
137
  SOURCE_UID = "Source"
138
  SOURCE_EXTRACT = "Source extract"
@@ -144,17 +205,14 @@ class FieldDataFrame:
144
  def render_record_sets():
145
  col1, col2 = st.columns([1, 1])
146
  with col1:
147
- _render_left_panel()
 
148
  with col2:
149
  _render_right_panel()
150
 
151
 
152
  def _render_left_panel():
153
  """Left panel: visualization of all RecordSets as expandable forms."""
154
- distribution = st.session_state[Metadata].distribution
155
- if not distribution:
156
- st.markdown("Please add resources first.")
157
- return
158
  record_sets = st.session_state[Metadata].record_sets
159
  record_set: RecordSet
160
  for record_set_key, record_set in enumerate(record_sets):
@@ -188,12 +246,20 @@ def _render_left_panel():
188
  on_change=handle_record_set_change,
189
  args=(RecordSetEvent.IS_ENUMERATION, record_set, key),
190
  )
 
 
 
 
 
 
 
 
191
 
192
  joins = _find_joins(record_set.fields)
193
  has_join = st.checkbox(
194
- "Whether the RecordSet contains joins. To add a new join, add a"
195
- f" field with a source in `{record_set.name}` and a reference to"
196
- " another RecordSet or FileSet/FileObject.",
197
  key=f"{prefix}-has-joins",
198
  value=bool(joins),
199
  disabled=True,
@@ -248,8 +314,7 @@ def _render_left_panel():
248
  )
249
  st.data_editor(
250
  fields,
251
- # There is a bug with `st.data_editor` when the df is empty.
252
- use_container_width=not fields.empty,
253
  num_rows="dynamic",
254
  key=data_editor_key,
255
  column_config={
@@ -273,6 +338,26 @@ def _render_left_panel():
273
  on_change=_handle_fields_change,
274
  args=(record_set_key, record_set),
275
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  st.button(
278
  "Edit fields details",
@@ -297,56 +382,80 @@ def _render_right_panel():
297
  record_set = selected.record_set
298
  record_set_key = selected.record_set_key
299
  with st.expander("**Fields**", expanded=True):
300
- for field_key, field in enumerate(record_set.fields):
301
- prefix = f"{record_set_key}-{field.name}-{field_key}"
302
- col1, col2, col3 = st.columns([1, 1, 1])
303
-
304
- key = f"{prefix}-name"
305
- col1.text_input(
306
- needed_field("Name"),
307
- placeholder="Name without special character.",
308
- key=key,
309
- value=field.name,
310
- on_change=handle_field_change,
311
- args=(FieldEvent.NAME, field, key),
312
  )
313
- key = f"{prefix}-description"
314
- col2.text_input(
315
- "Description",
316
- placeholder="Provide a clear description of the RecordSet.",
 
 
317
  key=key,
318
- on_change=handle_field_change,
319
- value=field.description,
320
- args=(FieldEvent.DESCRIPTION, field, key),
 
 
 
 
 
 
 
321
  )
322
- if field.data_types:
323
- data_type = field.data_types[0]
324
- if isinstance(data_type, str):
325
- data_type = term.URIRef(data_type)
326
- if data_type in DATA_TYPES:
327
- data_type_index = DATA_TYPES.index(data_type)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  else:
329
  data_type_index = None
330
- else:
331
- data_type_index = None
332
- key = f"{prefix}-datatypes"
333
- col3.selectbox(
334
- needed_field("Data type"),
335
- index=data_type_index,
336
- options=DATA_TYPES,
337
- key=key,
338
- on_change=handle_field_change,
339
- args=(FieldEvent.DATA_TYPE, field, key),
340
- )
341
- possible_sources = _get_possible_sources(metadata)
342
- render_source(
343
- record_set_key, record_set, field, field_key, possible_sources
344
- )
345
- render_references(
346
- record_set_key, record_set, field, field_key, possible_sources
347
- )
348
-
349
- st.divider()
350
 
351
  st.button(
352
  "Close",
 
1
+ import multiprocessing
2
+ import textwrap
3
+ import time
4
+ from typing import TypedDict
5
 
6
  import numpy as np
7
  import pandas as pd
 
31
  mlc.DataType.URL,
32
  ]
33
 
34
+ _NUM_RECORDS = 3
35
+ _TIMEOUT_SECONDS = 1
36
+
37
+
38
+ class _Result(TypedDict):
39
+ df: pd.DataFrame | None
40
+ exception: Exception | None
41
+
42
+
43
+ @st.cache_data(show_spinner="Generating the dataset...")
44
+ def _generate_data_with_timeout(record_set: RecordSet) -> _Result:
45
+ """Generates the data and waits at most _TIMEOUT_SECONDS."""
46
+ with multiprocessing.Manager() as manager:
47
+ result: _Result = manager.dict(df=None, exception=None)
48
+ args = (record_set, result)
49
+ process = multiprocessing.Process(target=_generate_data, args=args)
50
+ process.start()
51
+ if not process.is_alive():
52
+ return _Result(**result)
53
+ time.sleep(_TIMEOUT_SECONDS)
54
+ if process.is_alive():
55
+ process.kill()
56
+ result["exception"] = TimeoutError(
57
+ "The generation took too long and was killed. Please, use the CLI as"
58
+ " described in"
59
+ " https://github.com/mlcommons/croissant/tree/main/python/mlcroissant#verifyload-a-croissant-dataset."
60
+ )
61
+ return _Result(**result)
62
+
63
+
64
+ def _generate_data(record_set: RecordSet, result: _Result) -> pd.DataFrame | None:
65
+ """Generates the first _NUM_RECORDS records."""
66
+ try:
67
+ metadata: Metadata = st.session_state[Metadata]
68
+ if not metadata:
69
+ raise ValueError(
70
+ "The dataset is still incomplete. Please, go to the overview to see"
71
+ " errors."
72
+ )
73
+ croissant = metadata.to_canonical()
74
+ if croissant:
75
+ dataset = mlc.Dataset.from_metadata(croissant)
76
+ records = iter(dataset.records(record_set=record_set.name))
77
+ df = []
78
+ for i, record in enumerate(iter(records)):
79
+ if i >= _NUM_RECORDS:
80
+ break
81
+ # Decode bytes as str:
82
+ for key, value in record.items():
83
+ if isinstance(value, bytes):
84
+ try:
85
+ record[key] = value.decode("utf-8")
86
+ except:
87
+ pass
88
+ df.append(record)
89
+ result["df"] = pd.DataFrame(df)
90
+ except Exception as exception:
91
+ result["exception"] = exception
92
+
93
 
94
  def _handle_close_fields():
95
  st.session_state[SelectedRecordSet] = None
 
178
  name=added_row.get(FieldDataFrame.NAME),
179
  description=added_row.get(FieldDataFrame.DESCRIPTION),
180
  data_types=[added_row.get(FieldDataFrame.DATA_TYPE)],
181
+ source=mlc.Source(),
 
 
 
 
182
  references=mlc.Source(),
183
  )
184
  st.session_state[Metadata].add_field(record_set_key, field)
185
  for field_key in result["deleted_rows"]:
186
  st.session_state[Metadata].remove_field(record_set_key, field_key)
187
+ # Reset the in-line data if it exists.
188
+ if record_set.data:
189
+ record_set.data = []
190
 
191
 
192
  class FieldDataFrame:
193
  """Names of the columns in the pd.DataFrame for `fields`."""
194
 
195
+ NAME = "Field name"
196
+ DESCRIPTION = "Field description"
197
  DATA_TYPE = "Data type"
198
  SOURCE_UID = "Source"
199
  SOURCE_EXTRACT = "Source extract"
 
205
  def render_record_sets():
206
  col1, col2 = st.columns([1, 1])
207
  with col1:
208
+ with st.spinner("Generating the dataset..."):
209
+ _render_left_panel()
210
  with col2:
211
  _render_right_panel()
212
 
213
 
214
  def _render_left_panel():
215
  """Left panel: visualization of all RecordSets as expandable forms."""
 
 
 
 
216
  record_sets = st.session_state[Metadata].record_sets
217
  record_set: RecordSet
218
  for record_set_key, record_set in enumerate(record_sets):
 
246
  on_change=handle_record_set_change,
247
  args=(RecordSetEvent.IS_ENUMERATION, record_set, key),
248
  )
249
+ key = f"{prefix}-has-data"
250
+ st.checkbox(
251
+ "Whether the RecordSet has in-line data",
252
+ key=key,
253
+ value=bool(record_set.data),
254
+ on_change=handle_record_set_change,
255
+ args=(RecordSetEvent.HAS_DATA, record_set, key),
256
+ )
257
 
258
  joins = _find_joins(record_set.fields)
259
  has_join = st.checkbox(
260
+ "Whether the RecordSet contains joins. To add a new join, add a field"
261
+ " with a source in `RecordSet`/`FileSet`/`FileObject` and a reference"
262
+ " to another `RecordSet`/`FileSet`/`FileObject`.",
263
  key=f"{prefix}-has-joins",
264
  value=bool(joins),
265
  disabled=True,
 
314
  )
315
  st.data_editor(
316
  fields,
317
+ use_container_width=True,
 
318
  num_rows="dynamic",
319
  key=data_editor_key,
320
  column_config={
 
338
  on_change=_handle_fields_change,
339
  args=(record_set_key, record_set),
340
  )
341
+ result: _Result = _generate_data_with_timeout(record_set)
342
+ df, exception = result.get("df"), result.get("exception")
343
+ if exception is None and df is not None and not df.empty:
344
+ st.markdown("Previsualize the data:")
345
+ st.dataframe(df, use_container_width=True)
346
+ # The generation is not triggered if record_set has in-line `data`.
347
+ elif not record_set.data:
348
+ left, right = st.columns([1, 10])
349
+ if exception:
350
+ left.button(
351
+ "⚠️",
352
+ key=f"idea-{prefix}",
353
+ disabled=True,
354
+ help=textwrap.dedent(f"""**Error**:
355
+ ```
356
+ {exception}
357
+ ```
358
+ """),
359
+ )
360
+ right.markdown("No preview is possible.")
361
 
362
  st.button(
363
  "Edit fields details",
 
382
  record_set = selected.record_set
383
  record_set_key = selected.record_set_key
384
  with st.expander("**Fields**", expanded=True):
385
+ if isinstance(record_set.data, list):
386
+ st.markdown(
387
+ f"{needed_field('Data')}. This RecordSet is marked as having in-line"
388
+ " data. Please, list the data below:"
 
 
 
 
 
 
 
 
389
  )
390
+ key = f"{record_set_key}-fields-data"
391
+ columns = [field.name for field in record_set.fields]
392
+ st.data_editor(
393
+ pd.DataFrame(record_set.data, columns=columns),
394
+ use_container_width=True,
395
+ num_rows="dynamic",
396
  key=key,
397
+ column_config={
398
+ field.name: st.column_config.TextColumn(
399
+ field.name,
400
+ help=field.description,
401
+ required=True,
402
+ )
403
+ for field in record_set.fields
404
+ },
405
+ on_change=handle_record_set_change,
406
+ args=(RecordSetEvent.CHANGE_DATA, record_set, key),
407
  )
408
+ else:
409
+ for field_key, field in enumerate(record_set.fields):
410
+ prefix = f"{record_set_key}-{field.name}-{field_key}"
411
+ col1, col2, col3 = st.columns([1, 1, 1])
412
+
413
+ key = f"{prefix}-name"
414
+ col1.text_input(
415
+ needed_field("Name"),
416
+ placeholder="Name without special character.",
417
+ key=key,
418
+ value=field.name,
419
+ on_change=handle_field_change,
420
+ args=(FieldEvent.NAME, field, key),
421
+ )
422
+ key = f"{prefix}-description"
423
+ col2.text_input(
424
+ "Description",
425
+ placeholder="Provide a clear description of the RecordSet.",
426
+ key=key,
427
+ on_change=handle_field_change,
428
+ value=field.description,
429
+ args=(FieldEvent.DESCRIPTION, field, key),
430
+ )
431
+ if field.data_types:
432
+ data_type = field.data_types[0]
433
+ if isinstance(data_type, str):
434
+ data_type = term.URIRef(data_type)
435
+ if data_type in DATA_TYPES:
436
+ data_type_index = DATA_TYPES.index(data_type)
437
+ else:
438
+ data_type_index = None
439
  else:
440
  data_type_index = None
441
+ key = f"{prefix}-datatypes"
442
+ col3.selectbox(
443
+ needed_field("Data type"),
444
+ index=data_type_index,
445
+ options=DATA_TYPES,
446
+ key=key,
447
+ on_change=handle_field_change,
448
+ args=(FieldEvent.DATA_TYPE, field, key),
449
+ )
450
+ possible_sources = _get_possible_sources(metadata)
451
+ render_source(
452
+ record_set_key, record_set, field, field_key, possible_sources
453
+ )
454
+ render_references(
455
+ record_set_key, record_set, field, field_key, possible_sources
456
+ )
457
+
458
+ st.divider()
 
 
459
 
460
  st.button(
461
  "Close",