Spaces:

training-transformers-together
/

Dashboard

Runtime error

App Files Files Community

SaulLu commited on Nov 21, 2021

Commit

cf4f63b

•

1 Parent(s): 3046477

start to add bubble animation

Browse files

Files changed (5) hide show

Makefile +15 -0
app.py +17 -0
dashboard_utils/bubbles.py +122 -0
requirements-dev.txt +3 -0
requirements.txt +1 -0

Makefile ADDED Viewed

	@@ -0,0 +1,15 @@

+.PHONY: quality style test test-examples
+# Check that source code meets quality standards
+quality:
+	python -m black --check --line-length 119 --target-version py38 .
+	python -m isort --check-only .
+	python -m flake8 --max-line-length 119
+# Format source code automatically
+style:
+	python -m black --line-length 119 --target-version py38 .
+	python -m isort .

app.py CHANGED Viewed

@@ -1,4 +1,21 @@
 import streamlit as st
 st.title("Training transformers together dashboard")
 st.write("test")

+import json
 import streamlit as st
+from streamlit_observable import observable
+from dashboard_utils.bubbles import get_new_bubble_data
 st.title("Training transformers together dashboard")
 st.write("test")
+serialized_data, profiles = get_new_bubble_data()
+observers = observable(
+    "Participants",
+    notebook="d/9ae236a507f54046",  # "@huggingface/participants-bubbles-chart",
+    targets=["c_noaws"],
+    # observe=["selectedCounties"]
+    redefine={"serializedData": serialized_data, "profileSimple": profiles},
+)

dashboard_utils/bubbles.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import datetime
+from urllib import parse
+import requests
+import wandb
+URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
+WANDB_REPO = "learning-at-home/Worker_logs"
+def get_new_bubble_data():
+    serialized_data_points, latest_timestamp = get_serialized_data_points()
+    serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
+    profiles = get_profiles(serialized_data_points)
+    return serialized_data, profiles
+def get_profiles(serialized_data_points):
+    profiles = []
+    for username in serialized_data_points.keys():
+        params = {"type": "user", "q": username}
+        new_url = URL_QUICKSEARCH + parse.urlencode(params)
+        r = requests.get(new_url)
+        response = r.json()
+        try:
+            avatarUrl = response["users"][0]["avatarUrl"]
+        except:
+            avatarUrl = "/avatars/57584cb934354663ac65baa04e6829bf.svg"
+        if avatarUrl.startswith("/avatars/"):
+            avatarUrl = f"https://huggingface.co{avatarUrl}"
+        profiles.append(
+            {"id": username, "name": username, "src": avatarUrl, "url": f"https://huggingface.co/{username}"}
+        )
+    return profiles
+def get_serialized_data_points():
+    api = wandb.Api()
+    runs = api.runs(WANDB_REPO)
+    serialized_data_points = {}
+    latest_timestamp = None
+    print("**start api call")
+    for run in runs:
+        run_summary = run.summary._json_dict
+        run_name = run.name
+        if run_name in serialized_data_points:
+            try:
+                timestamp = run_summary["_timestamp"]
+                serialized_data_points[run_name]["Runs"].append(
+                    {
+                        "batches": run_summary["_step"],
+                        "runtime": run_summary["_runtime"],
+                        "loss": run_summary["train/loss"],
+                        "velocity": run_summary["_step"] / run_summary["_runtime"],
+                        "date": datetime.datetime.utcfromtimestamp(timestamp),
+                    }
+                )
+                if not latest_timestamp or timestamp > latest_timestamp:
+                    latest_timestamp = timestamp
+            except Exception as e:
+                pass
+                # print(e)
+                # print([key for key in list(run_summary.keys()) if "gradients" not in key])
+        else:
+            try:
+                timestamp = run_summary["_timestamp"]
+                serialized_data_points[run_name] = {
+                    "profileId": run_name,
+                    "Runs": [
+                        {
+                            "batches": run_summary["_step"],
+                            "runtime": run_summary["_runtime"],
+                            "loss": run_summary["train/loss"],
+                            "velocity": run_summary["_step"] / run_summary["_runtime"],
+                            "date": datetime.datetime.utcfromtimestamp(timestamp),
+                        }
+                    ],
+                }
+                if not latest_timestamp or timestamp > latest_timestamp:
+                    latest_timestamp = timestamp
+            except Exception as e:
+                pass
+                # print(e)
+                # print([key for key in list(run_summary.keys()) if "gradients" not in key])
+    latest_timestamp = datetime.datetime.utcfromtimestamp(latest_timestamp)
+    print("**finish api call")
+    return serialized_data_points, latest_timestamp
+def get_serialized_data(serialized_data_points, latest_timestamp):
+    serialized_data_points_v2 = []
+    max_velocity = 1
+    for run_name, serialized_data_point in serialized_data_points.items():
+        activeRuns = []
+        loss = 0
+        runtime = 0
+        batches = 0
+        velocity = 0
+        for run in serialized_data_point["Runs"]:
+            if run["date"] == latest_timestamp:
+                run["date"] = run["date"].isoformat()
+                activeRuns.append(run)
+                loss += run["loss"]
+                velocity += run["velocity"]
+            loss = loss / len(activeRuns) if activeRuns else 0
+            runtime += run["runtime"]
+            batches += run["batches"]
+        new_item = {
+            "date": latest_timestamp.isoformat(),
+            "profileId": run_name,
+            "batches": batches,
+            "runtime": runtime,
+            "loss": loss,
+            "velocity": velocity,
+            "activeRuns": activeRuns,
+        }
+        serialized_data_points_v2.append(new_item)
+    serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
+    return serialized_data

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+black
+isort
+flake8

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	streamlit


1	streamlit
2	+ streamlit-observable