marcenacp commited on
Commit
dc92053
1 Parent(s): 0c5b67f
app.py CHANGED
@@ -5,7 +5,8 @@ import streamlit as st
5
  from core.constants import OAUTH_CLIENT_ID
6
  from core.constants import OAUTH_STATE
7
  from core.constants import REDIRECT_URI
8
- from core.state import CurrentStep
 
9
  from core.state import get_cached_user
10
  from core.state import User
11
  from utils import init_state
@@ -48,27 +49,28 @@ if OAUTH_CLIENT_ID and not user:
48
 
49
  def _back_to_menu():
50
  """Sends the user back to the menu."""
 
51
  init_state(force=True)
52
 
53
 
54
  def _logout():
55
  """Logs the user out."""
56
  st.cache_data.clear()
 
57
 
58
 
59
  if OAUTH_CLIENT_ID:
60
  col2.write("\n") # Vertical box to shift the lgout menu
61
  col2.button("Log out", on_click=_logout)
62
 
 
63
 
64
- if st.session_state[CurrentStep] != CurrentStep.splash:
65
  col3.write("\n") # Vertical box to shift the button menu
66
  col3.button("Menu", on_click=_back_to_menu)
67
 
68
 
69
- if st.session_state[CurrentStep] == CurrentStep.splash:
70
- render_splash()
71
- elif st.session_state[CurrentStep] == CurrentStep.editor:
72
  render_editor()
73
  else:
74
- st.warning("invalid unhandled app state")
 
5
  from core.constants import OAUTH_CLIENT_ID
6
  from core.constants import OAUTH_STATE
7
  from core.constants import REDIRECT_URI
8
+ from core.query_params import get_project_timestamp
9
+ from core.state import CurrentProject
10
  from core.state import get_cached_user
11
  from core.state import User
12
  from utils import init_state
 
49
 
50
  def _back_to_menu():
51
  """Sends the user back to the menu."""
52
+ st.experimental_set_query_params()
53
  init_state(force=True)
54
 
55
 
56
  def _logout():
57
  """Logs the user out."""
58
  st.cache_data.clear()
59
+ _back_to_menu()
60
 
61
 
62
  if OAUTH_CLIENT_ID:
63
  col2.write("\n") # Vertical box to shift the lgout menu
64
  col2.button("Log out", on_click=_logout)
65
 
66
+ timestamp = get_project_timestamp()
67
 
68
+ if timestamp:
69
  col3.write("\n") # Vertical box to shift the button menu
70
  col3.button("Menu", on_click=_back_to_menu)
71
 
72
 
73
+ if st.session_state.get(CurrentProject):
 
 
74
  render_editor()
75
  else:
76
+ render_splash()
core/constants.py CHANGED
@@ -28,3 +28,10 @@ def PAST_PROJECTS_PATH(user) -> epath.Path:
28
  PROJECT_FOLDER_PATTERN = "%Y%m%d%H%M%S%f"
29
 
30
  DF_HEIGHT = 150
 
 
 
 
 
 
 
 
28
  PROJECT_FOLDER_PATTERN = "%Y%m%d%H%M%S%f"
29
 
30
  DF_HEIGHT = 150
31
+
32
+ # Tabs
33
+ OVERVIEW = "Overview"
34
+ METADATA = "Metadata"
35
+ RESOURCES = "Resources"
36
+ RECORD_SETS = "RecordSets"
37
+ TABS = [OVERVIEW, METADATA, RESOURCES, RECORD_SETS]
core/past_projects.py CHANGED
@@ -5,6 +5,7 @@ from etils import epath
5
  import streamlit as st
6
 
7
  from core.constants import PAST_PROJECTS_PATH
 
8
  from core.state import CurrentProject
9
  from core.state import get_cached_user
10
  from core.state import Metadata
@@ -28,6 +29,7 @@ def save_current_project():
28
  project = CurrentProject.create_new()
29
  st.session_state[CurrentProject] = project
30
  project.path.mkdir(parents=True, exist_ok=True)
 
31
  with _pickle_file(project.path).open("wb") as file:
32
  try:
33
  pickle.dump(metadata, file)
 
5
  import streamlit as st
6
 
7
  from core.constants import PAST_PROJECTS_PATH
8
+ from core.query_params import set_project
9
  from core.state import CurrentProject
10
  from core.state import get_cached_user
11
  from core.state import Metadata
 
29
  project = CurrentProject.create_new()
30
  st.session_state[CurrentProject] = project
31
  project.path.mkdir(parents=True, exist_ok=True)
32
+ set_project(project)
33
  with _pickle_file(project.path).open("wb") as file:
34
  try:
35
  pickle.dump(metadata, file)
core/query_params.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Module to manipulate query params."""
2
+
3
+ from typing import Any
4
+
5
+ import streamlit as st
6
+
7
+ from core.constants import TABS
8
+ from core.state import CurrentProject
9
+ from core.state import RecordSet
10
+
11
+
12
+ class QueryParams:
13
+ """Possible URL query params."""
14
+
15
+ OPEN_PROJECT = "project"
16
+ OPEN_RECORD_SET = "recordSet"
17
+ OPEN_TAB = "tab"
18
+
19
+
20
+ def _get_query_param(params: dict[str, Any], name: str) -> str | None:
21
+ """Gets query param with the name `name`."""
22
+ if name in params:
23
+ param = params[name]
24
+ if isinstance(param, list) and len(param) > 0:
25
+ return param[0]
26
+ return None
27
+
28
+
29
+ def _set_query_param(param: str, new_value: str) -> str | None:
30
+ params = st.experimental_get_query_params()
31
+ new_params = {k: v for k, v in params.items() if k != param}
32
+ new_params[param] = new_value
33
+ st.experimental_set_query_params(**new_params)
34
+
35
+
36
+ def go_to_tab(tabs: list[str]):
37
+ params = st.experimental_get_query_params()
38
+ if QueryParams.OPEN_TAB in params:
39
+ try:
40
+ tab = int(params[QueryParams.OPEN_TAB][0])
41
+ if 0 <= tab and tab < len(tabs):
42
+ tab_id = f"tabs-bui3-tab-{tab}"
43
+ # Click on the tab.
44
+ js = f"""
45
+ <script>
46
+ const tab = window.parent.document.getElementById('{tab_id}');
47
+ tab.click();
48
+ </script>
49
+ """
50
+ st.components.v1.html(js)
51
+ except ValueError:
52
+ pass
53
+
54
+
55
+ def set_tab(tab: str):
56
+ if tab not in TABS:
57
+ return
58
+ index = TABS.index(tab)
59
+ _set_query_param(QueryParams.OPEN_TAB, index)
60
+
61
+
62
+ def is_record_set_expanded(record_set: RecordSet) -> bool:
63
+ params = st.experimental_get_query_params()
64
+ open_record_set_name = _get_query_param(params, QueryParams.OPEN_RECORD_SET)
65
+ if open_record_set_name:
66
+ return open_record_set_name == record_set.name
67
+ return False
68
+
69
+
70
+ def expand_record_set(record_set: RecordSet) -> None:
71
+ _set_query_param(QueryParams.OPEN_RECORD_SET, record_set.name)
72
+
73
+
74
+ def get_project_timestamp() -> str | None:
75
+ params = st.experimental_get_query_params()
76
+ return _get_query_param(params, QueryParams.OPEN_PROJECT)
77
+
78
+
79
+ def set_project(project: CurrentProject):
80
+ _set_query_param(QueryParams.OPEN_PROJECT, project.path.name)
core/state.py CHANGED
@@ -88,13 +88,6 @@ def get_cached_user():
88
  return st.session_state.get(User)
89
 
90
 
91
- class CurrentStep:
92
- """Holds all major state variables for the application."""
93
-
94
- splash = "splash"
95
- editor = "editor"
96
-
97
-
98
  @dataclasses.dataclass
99
  class CurrentProject:
100
  """The selected project."""
@@ -104,6 +97,10 @@ class CurrentProject:
104
  @classmethod
105
  def create_new(cls) -> CurrentProject | None:
106
  timestamp = datetime.datetime.now().strftime(PROJECT_FOLDER_PATTERN)
 
 
 
 
107
  user = get_cached_user()
108
  if user is None and OAUTH_CLIENT_ID:
109
  return None
 
88
  return st.session_state.get(User)
89
 
90
 
 
 
 
 
 
 
 
91
  @dataclasses.dataclass
92
  class CurrentProject:
93
  """The selected project."""
 
97
  @classmethod
98
  def create_new(cls) -> CurrentProject | None:
99
  timestamp = datetime.datetime.now().strftime(PROJECT_FOLDER_PATTERN)
100
+ return cls.from_timestamp(timestamp)
101
+
102
+ @classmethod
103
+ def from_timestamp(cls, timestamp: str) -> CurrentProject | None:
104
  user = get_cached_user()
105
  if user is None and OAUTH_CLIENT_ID:
106
  return None
cypress/downloads/croissant-titanic.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"@context": {"@language": "en", "@vocab": "https://schema.org/", "column": "ml:column", "data": {"@id": "ml:data", "@type": "@json"}, "dataType": {"@id": "ml:dataType", "@type": "@vocab"}, "extract": "ml:extract", "field": "ml:field", "fileProperty": "ml:fileProperty", "format": "ml:format", "includes": "ml:includes", "isEnumeration": "ml:isEnumeration", "jsonPath": "ml:jsonPath", "ml": "http://mlcommons.org/schema/", "parentField": "ml:parentField", "path": "ml:path", "recordSet": "ml:recordSet", "references": "ml:references", "regex": "ml:regex", "repeated": "ml:repeated", "replace": "ml:replace", "sc": "https://schema.org/", "separator": "ml:separator", "source": "ml:source", "subField": "ml:subField", "transform": "ml:transform", "wd": "https://www.wikidata.org/wiki/"}, "@type": "sc:Dataset", "name": "Titanic", "description": "The original Titanic dataset, describing the status of individual passengers on the Titanic.\n\n The titanic data does not contain information from the crew, but it does contain actual ages of half of the passengers. \n\n For more information about how this dataset was constructed: \nhttps://web.archive.org/web/20200802155940/http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3info.txt\n\nOther useful information (useful for prices description for example):\nhttp://campus.lakeforest.edu/frank/FILES/MLFfiles/Bio150/Titanic/TitanicMETA.pdf\n\n Also see the following article describing shortcomings of the dataset data:\nhttps://emma-stiefel.medium.com/plugging-holes-in-kaggles-titanic-dataset-an-introduction-to-combining-datasets-with-fuzzywuzzy-60a686699da7\n", "citation": "The principal source for data about Titanic passengers is the Encyclopedia Titanica (http://www.encyclopedia-titanica.org/). The datasets used here were begun by a variety of researchers. One of the original sources is Eaton & Haas (1994) Titanic: Triumph and Tragedy, Patrick Stephens Ltd, which includes a passenger list created by many researchers and edited by Michael A. Findlay.\n\nThomas Cason of UVa has greatly updated and improved the titanic data frame using the Encyclopedia Titanica and created the dataset here. Some duplicate passengers have been dropped, many errors corrected, many missing ages filled in, and new variables created.\n", "license": "Public", "url": "https://www.openml.org/d/40945", "distribution": [{"@type": "sc:FileObject", "name": "passengers.csv", "contentSize": "117743 B", "contentUrl": "https://www.openml.org/data/get_csv/16826755/phpMYEkMl", "encodingFormat": "text/csv", "sha256": "c617db2c7470716250f6f001be51304c76bcc8815527ab8bae734bdca0735737"}, {"@type": "sc:FileObject", "name": "genders.csv", "description": "Maps gender values (\"male\", \"female\") to semantic URLs.", "contentSize": "117743 B", "contentUrl": "data/genders.csv", "encodingFormat": "text/csv", "sha256": "c617db2c7470716250f6f001be51304c76bcc8815527ab8bae734bdca0735737"}, {"@type": "sc:FileObject", "name": "embarkation_ports.csv", "description": "Maps Embarkation port initial to labeled values.", "contentSize": "117743 B", "contentUrl": "data/embarkation_ports.csv", "encodingFormat": "text/csv", "sha256": "c617db2c7470716250f6f001be51304c76bcc8815527ab8bae734bdca0735737"}], "recordSet": [{"@type": "ml:RecordSet", "name": "genders", "description": "Maps gender labels to semantic definitions.", "isEnumeration": true, "key": "label", "field": [{"@type": "ml:Field", "name": "label", "description": "One of {\"male\", \"female\"}", "dataType": ["sc:Text", "sc:name"], "source": {"distribution": "genders.csv", "extract": {"column": "label"}}}, {"@type": "ml:Field", "name": "url", "description": "Corresponding WikiData URL", "dataType": ["sc:URL", "wd:Q48277"], "source": {"distribution": "genders.csv", "extract": {"column": "url"}}}]}, {"@type": "ml:RecordSet", "name": "embarkation_ports", "description": "Maps Embarkation port initial to labeled values.", "isEnumeration": true, "key": "key", "field": [{"@type": "ml:Field", "name": "key", "description": "C, Q, S or ?", "dataType": "sc:Text", "source": {"distribution": "embarkation_ports.csv", "extract": {"column": "key"}}}, {"@type": "ml:Field", "name": "label", "description": "Human-readable label", "dataType": ["sc:Text", "sc:name"], "source": {"distribution": "embarkation_ports.csv", "extract": {"column": "label"}}}, {"@type": "ml:Field", "name": "url", "description": "Corresponding WikiData URL", "dataType": ["sc:URL", "wd:Q515"], "source": {"distribution": "embarkation_ports.csv", "extract": {"column": "url"}}}]}, {"@type": "ml:RecordSet", "name": "passengers", "description": "The list of passengers. Does not include crew members.", "field": [{"@type": "ml:Field", "name": "name", "description": "Name of the passenger", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "name"}}}, {"@type": "ml:Field", "name": "gender", "description": "Gender of passenger (male or female)", "dataType": "sc:Text", "references": {"field": "genders/label"}, "source": {"distribution": "passengers.csv", "extract": {"column": "sex"}}}, {"@type": "ml:Field", "name": "age", "description": "Age of passenger at time of death. It's a string, because some values can be `?`.", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "age"}}}, {"@type": "ml:Field", "name": "survived", "description": "Survival status of passenger (0: Lost, 1: Saved)", "dataType": "sc:Integer", "source": {"distribution": "passengers.csv", "extract": {"column": "survived"}}}, {"@type": "ml:Field", "name": "pclass", "description": "Passenger Class (1st/2nd/3rd)", "dataType": "sc:Integer", "source": {"distribution": "passengers.csv", "extract": {"column": "pclass"}}}, {"@type": "ml:Field", "name": "cabin", "description": "Passenger cabin.", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "cabin"}}}, {"@type": "ml:Field", "name": "embarked", "description": "Port of Embarkation (C: Cherbourg, Q: Queenstown, S: Southampton, ?: Unknown).", "dataType": "sc:Text", "references": {"field": "embarkation_ports/key"}, "source": {"distribution": "passengers.csv", "extract": {"column": "embarked"}}}, {"@type": "ml:Field", "name": "fare", "description": "Passenger Fare (British pound). It's a string, because some values can be `?`.", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "fare"}}}, {"@type": "ml:Field", "name": "home_destination", "description": "Home and destination", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "home.dest"}}}, {"@type": "ml:Field", "name": "ticket", "description": "Ticket Number, may include a letter.", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "ticket"}}}, {"@type": "ml:Field", "name": "num_parents_children", "description": "Number of Parents/Children Aboard", "dataType": "sc:Integer", "source": {"distribution": "passengers.csv", "extract": {"column": "parch"}}}, {"@type": "ml:Field", "name": "num_siblings_spouses", "description": "Number of Siblings/Spouses Aboard", "dataType": "sc:Integer", "source": {"distribution": "passengers.csv", "extract": {"column": "sibsp"}}}, {"@type": "ml:Field", "name": "boat", "description": "Lifeboat used by passenger", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "boat"}}}, {"@type": "ml:Field", "name": "body", "description": "Body Identification Number", "dataType": "sc:Text", "source": {"distribution": "passengers.csv", "extract": {"column": "body"}}}]}]}
cypress/screenshots/renameDistribution.cy.js/Renaming of FileObjectsFileSetsRecordSetsFields -- should rename the FileObjectFileSet everywhere (failed).png ADDED
cypress/screenshots/uploadCsv.cy.js/Editor loads a local CSV as a resource -- should display the form Overview, Metadata, Resources, & Record Sets (failed).png CHANGED
events/fields.py CHANGED
@@ -3,6 +3,8 @@ from typing import Any
3
 
4
  import streamlit as st
5
 
 
 
6
  from core.state import Field
7
  from core.state import Metadata
8
  import mlcroissant as mlc
@@ -77,6 +79,7 @@ def handle_field_change(
77
  key: str,
78
  **kwargs,
79
  ):
 
80
  value = st.session_state[key]
81
  if change == FieldEvent.NAME:
82
  old_name = field.name
 
3
 
4
  import streamlit as st
5
 
6
+ from core.constants import RECORD_SETS
7
+ from core.query_params import set_tab
8
  from core.state import Field
9
  from core.state import Metadata
10
  import mlcroissant as mlc
 
79
  key: str,
80
  **kwargs,
81
  ):
82
+ set_tab(RECORD_SETS)
83
  value = st.session_state[key]
84
  if change == FieldEvent.NAME:
85
  old_name = field.name
events/metadata.py CHANGED
@@ -2,6 +2,8 @@ import enum
2
 
3
  import streamlit as st
4
 
 
 
5
  from core.state import Metadata
6
 
7
 
@@ -16,6 +18,7 @@ class MetadataEvent(enum.Enum):
16
 
17
 
18
  def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
 
19
  if event == MetadataEvent.NAME:
20
  metadata.name = st.session_state[key]
21
  elif event == MetadataEvent.DESCRIPTION:
 
2
 
3
  import streamlit as st
4
 
5
+ from core.constants import METADATA
6
+ from core.query_params import set_tab
7
  from core.state import Metadata
8
 
9
 
 
18
 
19
 
20
  def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
21
+ set_tab(METADATA)
22
  if event == MetadataEvent.NAME:
23
  metadata.name = st.session_state[key]
24
  elif event == MetadataEvent.DESCRIPTION:
events/record_sets.py CHANGED
@@ -2,6 +2,9 @@ import enum
2
 
3
  import streamlit as st
4
 
 
 
 
5
  from core.state import Metadata
6
  from core.state import RecordSet
7
 
@@ -15,6 +18,7 @@ class RecordSetEvent(enum.Enum):
15
 
16
 
17
  def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key: str):
 
18
  value = st.session_state[key]
19
  if event == RecordSetEvent.NAME:
20
  old_name = record_set.name
@@ -27,3 +31,4 @@ def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key:
27
  record_set.description = value
28
  elif event == RecordSetEvent.IS_ENUMERATION:
29
  record_set.is_enumeration = value
 
 
2
 
3
  import streamlit as st
4
 
5
+ from core.constants import RECORD_SETS
6
+ from core.query_params import expand_record_set
7
+ from core.query_params import set_tab
8
  from core.state import Metadata
9
  from core.state import RecordSet
10
 
 
18
 
19
 
20
  def handle_record_set_change(event: RecordSetEvent, record_set: RecordSet, key: str):
21
+ set_tab(RECORD_SETS)
22
  value = st.session_state[key]
23
  if event == RecordSetEvent.NAME:
24
  old_name = record_set.name
 
31
  record_set.description = value
32
  elif event == RecordSetEvent.IS_ENUMERATION:
33
  record_set.is_enumeration = value
34
+ expand_record_set(record_set=record_set)
events/resources.py CHANGED
@@ -2,6 +2,8 @@ import enum
2
 
3
  import streamlit as st
4
 
 
 
5
  from core.state import FileObject
6
  from core.state import FileSet
7
  from core.state import Metadata
@@ -21,6 +23,7 @@ class ResourceEvent(enum.Enum):
21
 
22
 
23
  def handle_resource_change(event: ResourceEvent, resource: Resource, key: str):
 
24
  value = st.session_state[key]
25
  if event == ResourceEvent.NAME:
26
  old_name = resource.name
 
2
 
3
  import streamlit as st
4
 
5
+ from core.constants import RESOURCES
6
+ from core.query_params import set_tab
7
  from core.state import FileObject
8
  from core.state import FileSet
9
  from core.state import Metadata
 
23
 
24
 
25
  def handle_resource_change(event: ResourceEvent, resource: Resource, key: str):
26
+ set_tab(RESOURCES)
27
  value = st.session_state[key]
28
  if event == ResourceEvent.NAME:
29
  old_name = resource.name
utils.py CHANGED
@@ -1,7 +1,8 @@
1
  import streamlit as st
2
 
 
 
3
  from core.state import CurrentProject
4
- from core.state import CurrentStep
5
  from core.state import Metadata
6
  from core.state import SelectedRecordSet
7
  from core.state import SelectedResource
@@ -12,33 +13,34 @@ def needed_field(text: str) -> str:
12
  return f"{text}:red[*]"
13
 
14
 
15
- def jump_to(step: str):
16
- """Maintains the user's location within the editor."""
17
- if step is not None:
18
- st.session_state[CurrentStep] = step
19
-
20
-
21
  def init_state(force=False):
22
  """Initializes the session state. `force=True` to force re-initializing it."""
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  if Metadata not in st.session_state or force:
25
  st.session_state[Metadata] = Metadata()
26
 
27
  if mlc.Dataset not in st.session_state or force:
28
  st.session_state[mlc.Dataset] = None
29
 
30
- if CurrentStep not in st.session_state or force:
31
- st.session_state[CurrentStep] = CurrentStep.splash
32
-
33
  if SelectedResource not in st.session_state or force:
34
  st.session_state[SelectedResource] = None
35
 
36
  if SelectedResource not in st.session_state or force:
37
  st.session_state[SelectedRecordSet] = None
38
 
39
- if CurrentProject not in st.session_state or force:
40
- st.session_state[CurrentProject] = CurrentProject.create_new()
41
-
42
  # Uncomment those lines if you work locally in order to avoid clicks at each reload.
43
  # And comment all previous lines in `init_state`.
44
  # if mlc.Dataset not in st.session_state or force:
@@ -47,7 +49,5 @@ def init_state(force=False):
47
  # st.session_state[Metadata] = Metadata.from_canonical(
48
  # st.session_state[mlc.Dataset].metadata
49
  # )
50
- # if CurrentStep not in st.session_state or force:
51
- # st.session_state[CurrentStep] = CurrentStep.editor
52
  # if CurrentProject not in st.session_state or force:
53
  # st.session_state[CurrentProject] = CurrentProject.create_new()
 
1
  import streamlit as st
2
 
3
+ from core.past_projects import open_project
4
+ from core.query_params import get_project_timestamp
5
  from core.state import CurrentProject
 
6
  from core.state import Metadata
7
  from core.state import SelectedRecordSet
8
  from core.state import SelectedResource
 
13
  return f"{text}:red[*]"
14
 
15
 
 
 
 
 
 
 
16
  def init_state(force=False):
17
  """Initializes the session state. `force=True` to force re-initializing it."""
18
 
19
+ timestamp = get_project_timestamp()
20
+ if timestamp and not force:
21
+ project = CurrentProject.from_timestamp(timestamp)
22
+ if (
23
+ project
24
+ and CurrentProject not in st.session_state
25
+ and Metadata not in st.session_state
26
+ ):
27
+ st.session_state[CurrentProject] = project
28
+ st.session_state[Metadata] = open_project(project.path)
29
+ else:
30
+ st.session_state[CurrentProject] = None
31
+
32
  if Metadata not in st.session_state or force:
33
  st.session_state[Metadata] = Metadata()
34
 
35
  if mlc.Dataset not in st.session_state or force:
36
  st.session_state[mlc.Dataset] = None
37
 
 
 
 
38
  if SelectedResource not in st.session_state or force:
39
  st.session_state[SelectedResource] = None
40
 
41
  if SelectedResource not in st.session_state or force:
42
  st.session_state[SelectedRecordSet] = None
43
 
 
 
 
44
  # Uncomment those lines if you work locally in order to avoid clicks at each reload.
45
  # And comment all previous lines in `init_state`.
46
  # if mlc.Dataset not in st.session_state or force:
 
49
  # st.session_state[Metadata] = Metadata.from_canonical(
50
  # st.session_state[mlc.Dataset].metadata
51
  # )
 
 
52
  # if CurrentProject not in st.session_state or force:
53
  # st.session_state[CurrentProject] = CurrentProject.create_new()
views/files.py CHANGED
@@ -1,16 +1,15 @@
1
- import enum
2
-
3
  import streamlit as st
4
 
5
  from components.tree import render_tree
6
  from core.constants import DF_HEIGHT
 
7
  from core.files import file_from_form
8
  from core.files import file_from_upload
9
  from core.files import file_from_url
10
  from core.files import FILE_OBJECT
11
- from core.files import FILE_SET
12
  from core.files import FILE_TYPES
13
  from core.files import RESOURCE_TYPES
 
14
  from core.record_sets import infer_record_sets
15
  from core.state import FileObject
16
  from core.state import FileSet
@@ -70,6 +69,7 @@ def _render_resources_panel(files: list[Resource]) -> Resource | None:
70
  if not name:
71
  return None
72
  file = filename_to_file[name]
 
73
  return file
74
 
75
 
 
 
 
1
  import streamlit as st
2
 
3
  from components.tree import render_tree
4
  from core.constants import DF_HEIGHT
5
+ from core.constants import RESOURCES
6
  from core.files import file_from_form
7
  from core.files import file_from_upload
8
  from core.files import file_from_url
9
  from core.files import FILE_OBJECT
 
10
  from core.files import FILE_TYPES
11
  from core.files import RESOURCE_TYPES
12
+ from core.query_params import set_tab
13
  from core.record_sets import infer_record_sets
14
  from core.state import FileObject
15
  from core.state import FileSet
 
69
  if not name:
70
  return None
71
  file = filename_to_file[name]
72
+ # set_tab(RESOURCES)
73
  return file
74
 
75
 
views/load.py CHANGED
@@ -5,13 +5,12 @@ import streamlit as st
5
 
6
  from core.constants import EDITOR_CACHE
7
  from core.past_projects import save_current_project
8
- from core.state import CurrentStep
9
  from core.state import Metadata
10
  import mlcroissant as mlc
11
- from utils import jump_to
12
 
13
 
14
  def _on_file_upload(key):
 
15
  file = st.session_state[key]
16
  file_cont = file.read()
17
  # TODO(marcenacp): The Python library should support loading from an open file/dict.
@@ -22,8 +21,7 @@ def _on_file_upload(key):
22
  try:
23
  dataset = mlc.Dataset(newfile_name)
24
  st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
25
- jump_to(CurrentStep.editor)
26
- save_current_project()
27
  except mlc.ValidationError as e:
28
  st.warning(e)
29
  st.toast(body="Invalid Croissant File!", icon="🔥")
 
5
 
6
  from core.constants import EDITOR_CACHE
7
  from core.past_projects import save_current_project
 
8
  from core.state import Metadata
9
  import mlcroissant as mlc
 
10
 
11
 
12
  def _on_file_upload(key):
13
+ """Triggers when a new file gets uploaded to load the Croissant metadata."""
14
  file = st.session_state[key]
15
  file_cont = file.read()
16
  # TODO(marcenacp): The Python library should support loading from an open file/dict.
 
21
  try:
22
  dataset = mlc.Dataset(newfile_name)
23
  st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
24
+ # save_current_project()
 
25
  except mlc.ValidationError as e:
26
  st.warning(e)
27
  st.toast(body="Invalid Croissant File!", icon="🔥")
views/previous_files.py CHANGED
@@ -6,16 +6,16 @@ import streamlit as st
6
  from core.constants import PROJECT_FOLDER_PATTERN
7
  from core.past_projects import load_past_projects_paths
8
  from core.past_projects import open_project
 
9
  from core.state import CurrentProject
10
- from core.state import CurrentStep
11
  from core.state import Metadata
12
- from utils import jump_to
13
 
14
 
15
  def _load_croissant(metadata: Metadata, path: epath.Path) -> None:
16
  st.session_state[Metadata] = metadata
17
- st.session_state[CurrentProject] = CurrentProject(path)
18
- jump_to(CurrentStep.editor)
 
19
 
20
 
21
  def _remove_croissant(path: epath.Path) -> None:
@@ -24,29 +24,30 @@ def _remove_croissant(path: epath.Path) -> None:
24
 
25
  def render_previous_files():
26
  paths = load_past_projects_paths()
27
- if not paths:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  st.write("No past project to load. Create one on the left!")
29
- else:
30
- for index, path in enumerate(paths):
31
- try:
32
- metadata = open_project(path)
33
- timestamp = datetime.datetime.strptime(
34
- path.name, PROJECT_FOLDER_PATTERN
35
- ).strftime("%Y/%m/%d %H:%M")
36
- label = f"{metadata.name or 'Unnamed dataset'} - {timestamp}"
37
- col1, col2 = st.columns([10, 1])
38
- col1.button(
39
- label,
40
- key=f"splash-{index}-load",
41
- on_click=_load_croissant,
42
- args=(metadata, path),
43
- )
44
- col2.button(
45
- "✖️",
46
- help="Warning: this will delete the project.",
47
- key=f"splash-{index}-remove",
48
- on_click=_remove_croissant,
49
- args=(path,),
50
- )
51
- except:
52
- pass
 
6
  from core.constants import PROJECT_FOLDER_PATTERN
7
  from core.past_projects import load_past_projects_paths
8
  from core.past_projects import open_project
9
+ from core.query_params import set_project
10
  from core.state import CurrentProject
 
11
  from core.state import Metadata
 
12
 
13
 
14
  def _load_croissant(metadata: Metadata, path: epath.Path) -> None:
15
  st.session_state[Metadata] = metadata
16
+ project = CurrentProject(path)
17
+ st.session_state[CurrentProject] = project
18
+ set_project(project)
19
 
20
 
21
  def _remove_croissant(path: epath.Path) -> None:
 
24
 
25
  def render_previous_files():
26
  paths = load_past_projects_paths()
27
+ has_no_project = True
28
+ for index, path in enumerate(paths):
29
+ try:
30
+ metadata = open_project(path)
31
+ timestamp = datetime.datetime.strptime(
32
+ path.name, PROJECT_FOLDER_PATTERN
33
+ ).strftime("%Y/%m/%d %H:%M")
34
+ label = f"{metadata.name or 'Unnamed dataset'} - {timestamp}"
35
+ col1, col2 = st.columns([10, 1])
36
+ col1.button(
37
+ label,
38
+ key=f"splash-{index}-load",
39
+ on_click=_load_croissant,
40
+ args=(metadata, path),
41
+ )
42
+ col2.button(
43
+ "✖️",
44
+ help="Warning: this will delete the project.",
45
+ key=f"splash-{index}-remove",
46
+ on_click=_remove_croissant,
47
+ args=(path,),
48
+ )
49
+ has_no_project = False
50
+ except:
51
+ pass
52
+ if has_no_project:
53
  st.write("No past project to load. Create one on the left!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
views/record_sets.py CHANGED
@@ -1,8 +1,12 @@
 
 
1
  import numpy as np
2
  import pandas as pd
3
  from rdflib import term
4
  import streamlit as st
5
 
 
 
6
  from core.state import Field
7
  from core.state import Metadata
8
  from core.state import RecordSet
@@ -88,7 +92,10 @@ def _handle_create_record_set():
88
  metadata.add_record_set(RecordSet(name="new-record-set", description=""))
89
 
90
 
91
- def _handle_fields_change(record_set_key: int, record_set: RecordSet):
 
 
 
92
  data_editor_key = _data_editor_key(record_set_key, record_set)
93
  result = st.session_state[data_editor_key]
94
  # `result` has the following structure:
@@ -155,7 +162,7 @@ def _render_left_panel():
155
  for record_set_key, record_set in enumerate(record_sets):
156
  title = f"**{record_set.name or '-'}** ({len(record_set.fields)} fields)"
157
  prefix = f"record-set-{record_set_key}"
158
- with st.expander(title, expanded=False):
159
  col1, col2 = st.columns([1, 3])
160
  key = f"{prefix}-name"
161
  col1.text_input(
@@ -200,25 +207,25 @@ def _render_left_panel():
200
  "Left join",
201
  disabled=True,
202
  value=left[0],
203
- key=f"{prefix}-left-join-{left}",
204
  )
205
  col2.text_input(
206
  "Left key",
207
  disabled=True,
208
  value=left[1],
209
- key=f"{prefix}-left-key-{left}",
210
  )
211
  col4.text_input(
212
  "Right join",
213
  disabled=True,
214
  value=right[0],
215
- key=f"{prefix}-right-join-{right}",
216
  )
217
  col5.text_input(
218
  "Right key",
219
  disabled=True,
220
  value=right[1],
221
- key=f"{prefix}-right-key-{right}",
222
  )
223
  names = [field.name for field in record_set.fields]
224
  descriptions = [field.description for field in record_set.fields]
 
1
+ from typing import Any
2
+
3
  import numpy as np
4
  import pandas as pd
5
  from rdflib import term
6
  import streamlit as st
7
 
8
+ from core.query_params import expand_record_set
9
+ from core.query_params import is_record_set_expanded
10
  from core.state import Field
11
  from core.state import Metadata
12
  from core.state import RecordSet
 
92
  metadata.add_record_set(RecordSet(name="new-record-set", description=""))
93
 
94
 
95
+ def _handle_fields_change(
96
+ record_set_key: int, record_set: RecordSet, params: dict[str, Any]
97
+ ):
98
+ expand_record_set(record_set=record_set)
99
  data_editor_key = _data_editor_key(record_set_key, record_set)
100
  result = st.session_state[data_editor_key]
101
  # `result` has the following structure:
 
162
  for record_set_key, record_set in enumerate(record_sets):
163
  title = f"**{record_set.name or '-'}** ({len(record_set.fields)} fields)"
164
  prefix = f"record-set-{record_set_key}"
165
+ with st.expander(title, expanded=is_record_set_expanded(record_set)):
166
  col1, col2 = st.columns([1, 3])
167
  key = f"{prefix}-name"
168
  col1.text_input(
 
207
  "Left join",
208
  disabled=True,
209
  value=left[0],
210
+ key=f"{prefix}-left-join-{left[0]}-{left[1]}",
211
  )
212
  col2.text_input(
213
  "Left key",
214
  disabled=True,
215
  value=left[1],
216
+ key=f"{prefix}-left-key-{left[0]}-{left[1]}",
217
  )
218
  col4.text_input(
219
  "Right join",
220
  disabled=True,
221
  value=right[0],
222
+ key=f"{prefix}-right-join-{right[0]}-{right[1]}",
223
  )
224
  col5.text_input(
225
  "Right key",
226
  disabled=True,
227
  value=right[1],
228
+ key=f"{prefix}-right-key-{right[0]}-{right[1]}",
229
  )
230
  names = [field.name for field in record_set.fields]
231
  descriptions = [field.description for field in record_set.fields]
views/splash.py CHANGED
@@ -4,14 +4,12 @@ import requests
4
  import streamlit as st
5
 
6
  from core.constants import OAUTH_CLIENT_ID
 
7
  from core.state import CurrentProject
8
- from core.state import CurrentStep
9
  from core.state import Metadata
10
  import mlcroissant as mlc
11
- from utils import jump_to
12
  from views.load import render_load
13
  from views.previous_files import render_previous_files
14
- from views.side_buttons import jump_to
15
 
16
 
17
  def render_splash():
@@ -30,8 +28,9 @@ def render_splash():
30
 
31
  def create_new_croissant():
32
  st.session_state[Metadata] = Metadata()
33
- st.session_state[CurrentProject] = CurrentProject.create_new()
34
- jump_to(CurrentStep.editor)
 
35
 
36
  st.button(
37
  "Create",
@@ -46,8 +45,9 @@ def render_splash():
46
  json = requests.get(url).json()
47
  metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
48
  st.session_state[Metadata] = Metadata.from_canonical(metadata)
49
- st.session_state[CurrentProject] = CurrentProject.create_new()
50
- jump_to(CurrentStep.editor)
 
51
  except Exception as exception:
52
  logging.error(exception)
53
  st.error(
 
4
  import streamlit as st
5
 
6
  from core.constants import OAUTH_CLIENT_ID
7
+ from core.query_params import set_project
8
  from core.state import CurrentProject
 
9
  from core.state import Metadata
10
  import mlcroissant as mlc
 
11
  from views.load import render_load
12
  from views.previous_files import render_previous_files
 
13
 
14
 
15
  def render_splash():
 
28
 
29
  def create_new_croissant():
30
  st.session_state[Metadata] = Metadata()
31
+ project = CurrentProject.create_new()
32
+ st.session_state[CurrentProject] = project
33
+ set_project(project)
34
 
35
  st.button(
36
  "Create",
 
45
  json = requests.get(url).json()
46
  metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
47
  st.session_state[Metadata] = Metadata.from_canonical(metadata)
48
+ project = CurrentProject.create_new()
49
+ st.session_state[CurrentProject] = project
50
+ set_project(project)
51
  except Exception as exception:
52
  logging.error(exception)
53
  st.error(
views/wizard.py CHANGED
@@ -3,7 +3,12 @@ import json
3
  import streamlit as st
4
  import streamlit_nested_layout # Do not remove this allows nesting columns.
5
 
 
6
  from core.past_projects import save_current_project
 
 
 
 
7
  from core.state import Metadata
8
  import mlcroissant as mlc
9
  from views.files import render_files
@@ -26,16 +31,10 @@ def render_export_button(col):
26
  col.download_button("Export", disabled=True, data="", help=str(exception))
27
 
28
 
29
- OVERVIEW = "Overview"
30
- METADATA = "Metadata"
31
- RESOURCES = "Resources"
32
- RECORD_SETS = "RecordSets"
33
-
34
-
35
  def render_editor():
36
  col1, col2 = st.columns([10, 1])
37
  render_export_button(col2)
38
- tab1, tab2, tab3, tab4 = col1.tabs([OVERVIEW, METADATA, RESOURCES, RECORD_SETS])
39
 
40
  with tab1:
41
  render_overview()
@@ -46,3 +45,4 @@ def render_editor():
46
  with tab4:
47
  render_record_sets()
48
  save_current_project()
 
 
3
  import streamlit as st
4
  import streamlit_nested_layout # Do not remove this allows nesting columns.
5
 
6
+ from core.constants import TABS
7
  from core.past_projects import save_current_project
8
+ from core.query_params import get_project_timestamp
9
+ from core.query_params import go_to_tab
10
+ from core.query_params import QueryParams
11
+ from core.query_params import set_tab
12
  from core.state import Metadata
13
  import mlcroissant as mlc
14
  from views.files import render_files
 
31
  col.download_button("Export", disabled=True, data="", help=str(exception))
32
 
33
 
 
 
 
 
 
 
34
  def render_editor():
35
  col1, col2 = st.columns([10, 1])
36
  render_export_button(col2)
37
+ tab1, tab2, tab3, tab4 = col1.tabs(TABS)
38
 
39
  with tab1:
40
  render_overview()
 
45
  with tab4:
46
  render_record_sets()
47
  save_current_project()
48
+ go_to_tab(TABS)