WebDatasets

Runtime error

App Files Files Community

awacke1 commited on Dec 9, 2023

Commit

acbd0bd

1 Parent(s): 9e2fd8c

Update backup.py

Browse files

Files changed (1) hide show

backup.py +29 -22

backup.py CHANGED Viewed

@@ -8,8 +8,12 @@ import hashlib
 import json
 EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
-# Create a history.json file if it doesn't exist yet
 if not os.path.exists("history.json"):
     with open("history.json", "w") as f:
         json.dump({}, f)
@@ -35,12 +39,10 @@ def download_html_and_files(url, subdir):
         file_url = urllib.parse.urljoin(base_url, link.get('href'))
         local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
-        # Skip if the local filename is a directory
         if not local_filename.endswith('/') and local_filename != subdir:
             link['href'] = local_filename
             download_file(file_url, local_filename)
-    # Save the modified HTML content
     with open(os.path.join(subdir, "index.html"), "w") as file:
         file.write(str(soup))
@@ -48,24 +50,28 @@ def list_files(directory_path='.'):
     files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
     return [f for f in files if f not in EXCLUDED_FILES]
 def show_file_operations(file_path):
     st.write(f"File: {os.path.basename(file_path)}")
-    # Edit button
-    if st.button(f"✏️ Edit {os.path.basename(file_path)}"):
-        with open(file_path, "r") as f:
-            file_content = f.read()
-        file_content = st.text_area("Edit the file content:", value=file_content, height=250)
-        if st.button(f"💾 Save {os.path.basename(file_path)}"):
             with open(file_path, "w") as f:
                 f.write(file_content)
-            st.success(f"File {os.path.basename(file_path)} saved!")
-    # Delete button
-    if st.button(f"🗑️ Delete {os.path.basename(file_path)}"):
-        os.remove(file_path)
-        st.markdown(f"🎉 File {os.path.basename(file_path)} deleted!")
 def show_download_links(subdir):
     st.write(f'Files for {subdir}:')
@@ -77,7 +83,6 @@ def show_download_links(subdir):
         else:
             st.write(f"File not found: {file}")
 def get_download_link(file):
     with open(file, "rb") as f:
         bytes = f.read()
@@ -85,16 +90,19 @@ def get_download_link(file):
         href = f'<a href="data:file/octet-stream;base64,{b64}" download=\'{os.path.basename(file)}\'>Click to download {os.path.basename(file)}</a>'
     return href
 def main():
     st.sidebar.title('Web Datasets Bulk Downloader')
-    url = st.sidebar.text_input('Please enter a Web URL to bulk download text and files')
-    # Load history
     with open("history.json", "r") as f:
         history = json.load(f)
-    # Save the history of URL entered as a json file
     if url:
         subdir = hashlib.md5(url.encode()).hexdigest()
         if not os.path.exists(subdir):
@@ -112,7 +120,6 @@ def main():
         for subdir in history.values():
             show_download_links(subdir)
-    # Display history as markdown
     with st.expander("URL History and Downloaded Files"):
         for url, subdir in history.items():
             st.markdown(f"#### {url}")

 import json
 EXCLUDED_FILES = ['app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'README.md','.gitattributes', "backup.py","Dockerfile"]
+URLS = {
+    "Chordify - Play Along Chords": "https://chordify.net/",
+    "National Guitar Academy - Guitar Learning": "https://www.guitaracademy.com/",
+    "Ultimate Guitar - Massive Song Database": "https://www.ultimate-guitar.com/",
+}
 if not os.path.exists("history.json"):
     with open("history.json", "w") as f:
         json.dump({}, f)
         file_url = urllib.parse.urljoin(base_url, link.get('href'))
         local_filename = os.path.join(subdir, urllib.parse.urlparse(file_url).path.split('/')[-1])
         if not local_filename.endswith('/') and local_filename != subdir:
             link['href'] = local_filename
             download_file(file_url, local_filename)
     with open(os.path.join(subdir, "index.html"), "w") as file:
         file.write(str(soup))
     files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
     return [f for f in files if f not in EXCLUDED_FILES]
 def show_file_operations(file_path):
     st.write(f"File: {os.path.basename(file_path)}")
+    unique_key = hashlib.md5(file_path.encode()).hexdigest()
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        if st.button(f"✏️ Edit", key=f"edit_{unique_key}"):
+            file_content = ""
+            with open(file_path, "r") as f:
+                file_content = f.read()
+            file_content = st.text_area("Edit the file content:", value=file_content, height=250, key=f"text_area_{unique_key}")
+    with col2:
+        if st.button(f"💾 Save", key=f"save_{unique_key}"):
             with open(file_path, "w") as f:
                 f.write(file_content)
+            st.success(f"File saved!")
+    with col3:
+        if st.button(f"🗑️ Delete", key=f"delete_{unique_key}"):
+            os.remove(file_path)
+            st.markdown(f"File deleted!")
 def show_download_links(subdir):
     st.write(f'Files for {subdir}:')
         else:
             st.write(f"File not found: {file}")
 def get_download_link(file):
     with open(file, "rb") as f:
         bytes = f.read()
         href = f'<a href="data:file/octet-stream;base64,{b64}" download=\'{os.path.basename(file)}\'>Click to download {os.path.basename(file)}</a>'
     return href
 def main():
     st.sidebar.title('Web Datasets Bulk Downloader')
+    url_input_method = st.sidebar.radio("Choose URL Input Method", ["Enter URL", "Select from List"])
+    url = ""
+    if url_input_method == "Enter URL":
+        url = st.sidebar.text_input('Please enter a Web URL to bulk download text and files')
+    else:
+        selected_site = st.sidebar.selectbox("Select a Website", list(URLS.keys()))
+        url = URLS[selected_site]
     with open("history.json", "r") as f:
         history = json.load(f)
     if url:
         subdir = hashlib.md5(url.encode()).hexdigest()
         if not os.path.exists(subdir):
         for subdir in history.values():
             show_download_links(subdir)
     with st.expander("URL History and Downloaded Files"):
         for url, subdir in history.items():
             st.markdown(f"#### {url}")