import gradio as gr from gradio_huggingfacehub_search import HuggingfaceHubSearch import nbformat as nbf from huggingface_hub import HfApi def create_notebook_file(cell_commands, notebook_name="generated_notebook.ipynb"): nb = nbf.v4.new_notebook() nb['cells'] = [nbf.v4.new_code_cell(command) for command in cell_commands] with open(notebook_name, 'w') as f: nbf.write(nb, f) print(f"Notebook '{notebook_name}' created successfully.") def push_notebook(file_path, dataset_id, token): api = HfApi(token=token) api.upload_file( path_or_fileobj=file_path, path_in_repo="dataset_analysis.ipynb", repo_id=dataset_id, repo_type="dataset", ) print("Notebook uploaded to Huggingface Hub.") link = f"https://huggingface.co/datasets/{dataset_id}/blob/main/dataset_analyst.ipynb" return f'See notebook' def generate_notebook(dataset_id, token): api = HfApi(token=token) # TODO: Handle auth error # TODO: Get first config and split? or generate a dataframe per each split maybe? commands = [ f"!pip install pandas", f"import pandas as pd", f"df = pd.read_parquet('hf://datasets/{dataset_id}/data/train-00000-of-00001.parquet')", f"df.head()", ] notebook_name = f"{dataset_id.replace('/', '-')}.ipynb" create_notebook_file(commands, notebook_name=notebook_name) api.upload_file( path_or_fileobj=notebook_name, path_in_repo="dataset_analysis.ipynb", repo_id="asoria/en-text", repo_type="dataset", ) # TODO: Handle permission error print("Notebook uploaded to Huggingface Hub.") return notebook_name with gr.Blocks() as demo: gr.Markdown("# 🤖 Dataset auto analyst creator 🕵️") dataset_name = HuggingfaceHubSearch( label="Hub Dataset ID", placeholder="Search for dataset id on Huggingface", search_type="dataset", value="", ) @gr.render(inputs=dataset_name) def embed(name): if not name: return gr.Markdown("### No dataset provided") html_code = f""" """ return gr.HTML(value=html_code) generate_btn = gr.Button("Generate notebook and push to repo", visible=True) download_link = gr.File(label="Download Notebook") generate_btn.click(generate_notebook, inputs=[dataset_name], outputs=[download_link]) with gr.Row() as auth_page: with gr.Column(): auth_title = gr.Markdown( "Enter your token ([settings](https://huggingface.co/settings/tokens)):" ) token_box = gr.Textbox("", label="token", placeholder="hf_xxx", type="password" ) auth_error = gr.Markdown("", visible=False) def auth(token): if not token: return { auth_error: gr.Markdown(value="", visible=False), push_btn: gr.Row(visible=False) } return { auth_error: gr.Markdown(value="", visible=False), push_btn: gr.Row(visible=True) } push_btn = gr.Button("Push notebook to repo", visible=False) token_box.change( auth, inputs=token_box, outputs=[auth_error, push_btn], ) output_lbl = gr.HTML(value="") push_btn.click(push_notebook, inputs=[download_link, dataset_name, token_box], outputs=[output_lbl]) demo.launch()