Spaces:
Sleeping
Sleeping
Integration with compatible-libraries and other commands
Browse files
app.py
CHANGED
@@ -3,30 +3,37 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
3 |
import nbformat as nbf
|
4 |
from huggingface_hub import HfApi
|
5 |
from httpx import Client
|
|
|
|
|
6 |
|
7 |
"""
|
8 |
TODOs:
|
9 |
-
- Handle erros
|
10 |
- Add more commands to the notebook
|
11 |
- Parametrize the commands (Move to another file)
|
12 |
-
-
|
13 |
-
-
|
14 |
-
- Use an LLM to suggest commands
|
15 |
- Add commands for auto training
|
16 |
-
- Improve logs
|
17 |
- Enable 'generate notebook' button only if dataset is available and supports library
|
18 |
"""
|
19 |
|
|
|
20 |
BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
|
21 |
-
|
22 |
-
client = Client(headers=
|
|
|
|
|
23 |
|
24 |
|
25 |
def get_compatible_libraries(dataset: str):
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
def create_notebook_file(cell_commands, notebook_name):
|
@@ -35,41 +42,56 @@ def create_notebook_file(cell_commands, notebook_name):
|
|
35 |
|
36 |
with open(notebook_name, "w") as f:
|
37 |
nbf.write(nb, f)
|
|
|
38 |
|
39 |
|
40 |
def push_notebook(file_path, dataset_id, token):
|
41 |
notebook_name = "dataset_analysis.ipynb"
|
42 |
api = HfApi(token=token)
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
def generate_notebook(dataset_id):
|
56 |
first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
print(f"Error: {err}")
|
61 |
return gr.File(visible=False), gr.Row.update(visible=False)
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
65 |
first_code = pandas_library["loading_codes"][0]["code"]
|
66 |
else:
|
67 |
return gr.File(visible=False), gr.Row.update(visible=False)
|
68 |
|
|
|
69 |
commands = [
|
70 |
"!pip install pandas",
|
71 |
first_code,
|
72 |
"df.head()",
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
]
|
74 |
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
|
75 |
create_notebook_file(commands, notebook_name=notebook_name)
|
@@ -103,7 +125,7 @@ with gr.Blocks() as demo:
|
|
103 |
download_link = gr.File(label="Download notebook", visible=False)
|
104 |
with gr.Row(visible=False) as auth_page:
|
105 |
with gr.Column():
|
106 |
-
|
107 |
"Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):"
|
108 |
)
|
109 |
token_box = gr.Textbox(
|
|
|
3 |
import nbformat as nbf
|
4 |
from huggingface_hub import HfApi
|
5 |
from httpx import Client
|
6 |
+
import logging
|
7 |
+
|
8 |
|
9 |
"""
|
10 |
TODOs:
|
|
|
11 |
- Add more commands to the notebook
|
12 |
- Parametrize the commands (Move to another file)
|
13 |
+
- Let user choose the framework and get if from /compatible-libraries
|
14 |
+
- Use an LLM to suggest commands by column types
|
|
|
15 |
- Add commands for auto training
|
|
|
16 |
- Enable 'generate notebook' button only if dataset is available and supports library
|
17 |
"""
|
18 |
|
19 |
+
# Configuration
|
20 |
BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co"
|
21 |
+
HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
|
22 |
+
client = Client(headers=HEADERS)
|
23 |
+
|
24 |
+
logging.basicConfig(level=logging.INFO)
|
25 |
|
26 |
|
27 |
def get_compatible_libraries(dataset: str):
|
28 |
+
try:
|
29 |
+
resp = client.get(
|
30 |
+
f"{BASE_DATASETS_SERVER_URL}/compatible-libraries?dataset={dataset}"
|
31 |
+
)
|
32 |
+
resp.raise_for_status()
|
33 |
+
return resp.json()
|
34 |
+
except Exception as err:
|
35 |
+
logging.error(f"Failed to fetch compatible libraries: {err}")
|
36 |
+
return None
|
37 |
|
38 |
|
39 |
def create_notebook_file(cell_commands, notebook_name):
|
|
|
42 |
|
43 |
with open(notebook_name, "w") as f:
|
44 |
nbf.write(nb, f)
|
45 |
+
logging.info(f"Notebook {notebook_name} created successfully")
|
46 |
|
47 |
|
48 |
def push_notebook(file_path, dataset_id, token):
|
49 |
notebook_name = "dataset_analysis.ipynb"
|
50 |
api = HfApi(token=token)
|
51 |
+
try:
|
52 |
+
api.upload_file(
|
53 |
+
path_or_fileobj=file_path,
|
54 |
+
path_in_repo=notebook_name,
|
55 |
+
repo_id=dataset_id,
|
56 |
+
repo_type="dataset",
|
57 |
+
)
|
58 |
+
link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/{notebook_name}"
|
59 |
+
return gr.HTML(
|
60 |
+
value=f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline; text-decoration-style: dotted;">See notebook</a>',
|
61 |
+
visible=True,
|
62 |
+
)
|
63 |
+
except Exception as err:
|
64 |
+
logging.error(f"Failed to push notebook: {err}")
|
65 |
+
return gr.HTML(value="Failed to push notebook", visible=True)
|
66 |
|
67 |
|
68 |
def generate_notebook(dataset_id):
|
69 |
first_code = f"import pandas as pd\n\ndf = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')"
|
70 |
+
libraries = get_compatible_libraries(dataset_id)
|
71 |
+
|
72 |
+
if not libraries:
|
|
|
73 |
return gr.File(visible=False), gr.Row.update(visible=False)
|
74 |
+
|
75 |
+
pandas_library = next(
|
76 |
+
(lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
|
77 |
+
None,
|
78 |
+
)
|
79 |
+
if pandas_library:
|
80 |
first_code = pandas_library["loading_codes"][0]["code"]
|
81 |
else:
|
82 |
return gr.File(visible=False), gr.Row.update(visible=False)
|
83 |
|
84 |
+
html_code = f"<iframe src='https://huggingface.co/datasets/{dataset_id}/embed/viewer' width='80%' height='560px'></iframe>"
|
85 |
commands = [
|
86 |
"!pip install pandas",
|
87 |
first_code,
|
88 |
"df.head()",
|
89 |
+
f'from IPython.display import HTML\n\ndisplay(HTML("{html_code}"))',
|
90 |
+
"print(df.shape)",
|
91 |
+
"df.columns",
|
92 |
+
"df.describe()",
|
93 |
+
"df.info()",
|
94 |
+
# TODO: Generate more commands according to column types for EDA and then for auto training?
|
95 |
]
|
96 |
notebook_name = f"{dataset_id.replace('/', '-')}.ipynb"
|
97 |
create_notebook_file(commands, notebook_name=notebook_name)
|
|
|
125 |
download_link = gr.File(label="Download notebook", visible=False)
|
126 |
with gr.Row(visible=False) as auth_page:
|
127 |
with gr.Column():
|
128 |
+
gr.Markdown(
|
129 |
"Want to push to hub? Enter your token ([settings](https://huggingface.co/settings/tokens)):"
|
130 |
)
|
131 |
token_box = gr.Textbox(
|