Spaces:

macrocosm-os
/

prompting-dashboard

Running on CPU Upgrade

App Files Files Community

bkb2135 commited on Aug 9, 2024

Commit

c22f824

1 Parent(s): c9cda2b

Clean up repo more

Browse files

Files changed (6) hide show

.gitignore +3 -1
common/__init__.py +0 -0
common/middlewares.py +0 -42
common/schemas.py +0 -29
common/utils.py +0 -159
requirements.txt +0 -4

.gitignore CHANGED Viewed

@@ -159,4 +159,6 @@ cython_debug/
 testing/
 core
 app.config.js
-.vscode

 testing/
 core
 app.config.js
+.vscode
+wandb/
+_saved_runs.csv

common/__init__.py DELETED Viewed

File without changes

common/middlewares.py DELETED Viewed

@@ -1,42 +0,0 @@
-import os
-import json
-import bittensor as bt
-from aiohttp.web import Request, Response, middleware
-EXPECTED_ACCESS_KEY = os.environ.get("EXPECTED_ACCESS_KEY")
-@middleware
-async def api_key_middleware(request: Request, handler):
-    if request.path.startswith("/docs") or request.path.startswith("/static/swagger"):
-        # Skip checks when accessing OpenAPI documentation.
-        return await handler(request)
-    # Logging the request
-    bt.logging.info(f"Handling {request.method} request to {request.path}")
-    # Check access key
-    access_key = request.headers.get("api_key")
-    if EXPECTED_ACCESS_KEY is not None and access_key != EXPECTED_ACCESS_KEY:
-        bt.logging.error(f"Invalid access key: {access_key}")
-        return Response(status=401, reason="Invalid access key")
-    # Continue to the next handler if the API key is valid
-    return await handler(request)
-@middleware
-async def json_parsing_middleware(request: Request, handler):
-    if request.path.startswith("/docs") or request.path.startswith("/static/swagger"):
-        # Skip checks when accessing OpenAPI documentation.
-        return await handler(request)
-    try:
-        # Parsing JSON data from the request
-        request["data"] = await request.json()
-    except json.JSONDecodeError as e:
-        bt.logging.error(f"Invalid JSON data: {str(e)}")
-        return Response(status=400, text="Invalid JSON")
-    # Continue to the next handler if JSON is successfully parsed
-    return await handler(request)

common/schemas.py DELETED Viewed

@@ -1,29 +0,0 @@
-from marshmallow import Schema, fields
-class QueryChatSchema(Schema):
-    k = fields.Int(description="The number of miners from which to request responses.")
-    exclude = fields.List(fields.Str(), description="A list of roles or agents to exclude from querying.")
-    roles = fields.List(fields.Str(), required=True, description="The roles of the agents to query.")
-    messages = fields.List(fields.Str(), required=True, description="The messages to be sent to the network.")
-    timeout = fields.Int(description="The time in seconds to wait for a response.")
-    prefer = fields.Str(description="The preferred response format, can be either 'longest' or 'shortest'.")
-    sampling_mode = fields.Str(
-        description="The mode of sampling to use, defaults to 'random'. Can be either 'random' or 'top_incentive'.")
-class StreamChunkSchema(Schema):
-    delta = fields.Str(required=True, description="The new chunk of response received.")
-    finish_reason = fields.Str(description="The reason for the response completion, if applicable.")
-    accumulated_chunks = fields.List(fields.Str(), description="All accumulated chunks of responses.")
-    accumulated_chunks_timings = fields.List(fields.Float(), description="Timing for each chunk received.")
-    timestamp = fields.Str(required=True, description="The timestamp at which the chunk was processed.")
-    sequence_number = fields.Int(required=True, description="A sequential identifier for the response part.")
-    selected_uid = fields.Int(required=True, description="The identifier for the selected response source.")
-class StreamErrorSchema(Schema):
-    error = fields.Str(required=True, description="Description of the error occurred.")
-    timestamp = fields.Str(required=True, description="The timestamp of the error.")
-    sequence_number = fields.Int(required=True, description="A sequential identifier for the error.")
-    finish_reason = fields.Str(default="error", description="Indicates an error completion.")

common/utils.py DELETED Viewed

@@ -1,159 +0,0 @@
-import re
-import asyncio
-import bittensor as bt
-from aiohttp import web
-from collections import Counter
-from prompting.rewards import DateRewardModel, FloatDiffModel
-from validators.streamer import AsyncResponseDataStreamer
-UNSUCCESSFUL_RESPONSE_PATTERNS = [
-    "I'm sorry",
-    "unable to",
-    "I cannot",
-    "I can't",
-    "I am unable",
-    "I am sorry",
-    "I can not",
-    "don't know",
-    "not sure",
-    "don't understand",
-    "not capable",
-]
-reward_models = {
-    "date_qa": DateRewardModel(),
-    "math": FloatDiffModel(),
-}
-def completion_is_valid(completion: str):
-    """
-    Get the completion statuses from the completions.
-    """
-    if not completion.strip():
-        return False
-    patt = re.compile(
-        r"\b(?:" + "|".join(UNSUCCESSFUL_RESPONSE_PATTERNS) + r")\b", re.IGNORECASE
-    )
-    if not len(re.findall(r"\w+", completion)) or patt.search(completion):
-        return False
-    return True
-def ensemble_result(completions: list, task_name: str, prefer: str = "longest"):
-    """
-    Ensemble completions from multiple models.
-    # TODO: Measure agreement
-    # TODO: Figure out how to mitigate the cabal effect (large groups will appear to be more credible)
-    # TODO: Reward pipeline
-    """
-    if not completions:
-        return None
-    answer = None
-    if task_name in ("qa", "summarization"):
-        # No special handling for QA or summarization
-        supporting_completions = completions
-    elif task_name == "date_qa":
-        # filter the completions to be the ones that contain valid dates and if there are multiple dates, select the most common one (with support > 1)
-        dates = list(map(reward_models[task_name].parse_dates_from_text, completions))
-        bt.logging.info(f"Unprocessed dates: {dates}")
-        valid_date_indices = [i for i, d in enumerate(dates) if d]
-        valid_completions = [completions[i] for i in valid_date_indices]
-        valid_dates = [dates[i] for i in valid_date_indices]
-        dates = [f"{d[0].strftime('%-d %B')} {d[1]}" for d in valid_dates]
-        if not dates:
-            return None
-        counter = Counter(dates)
-        most_common, count = counter.most_common()[0]
-        answer = most_common
-        if count == 1:
-            supporting_completions = valid_completions
-        else:
-            supporting_completions = [
-                c for i, c in enumerate(valid_completions) if dates[i] == most_common
-            ]
-    elif task_name == "math":
-        # filter the completions to be the ones that contain valid numbers and if there are multiple values, select the most common one (with support > 1)
-        # TODO: use the median instead of the most common value
-        vals = list(map(reward_models[task_name].extract_number, completions))
-        vals = [val for val in vals if val]
-        if not vals:
-            return None
-        most_common, count = Counter(dates).most_common()[0]
-        bt.logging.info(f"Most common value: {most_common}, count: {count}")
-        answer = most_common
-        if count == 1:
-            supporting_completions = completions
-        else:
-            supporting_completions = [
-                c for i, c in enumerate(completions) if vals[i] == most_common
-            ]
-    bt.logging.info(f"Supporting completions: {supporting_completions}")
-    if prefer == "longest":
-        preferred_completion = sorted(supporting_completions, key=len)[-1]
-    elif prefer == "shortest":
-        preferred_completion = sorted(supporting_completions, key=len)[0]
-    elif prefer == "most_common":
-        preferred_completion = max(
-            set(supporting_completions), key=supporting_completions.count
-        )
-    else:
-        raise ValueError(f"Unknown ensemble preference: {prefer}")
-    return {
-        "completion": preferred_completion,
-        "accepted_answer": answer,
-        "support": len(supporting_completions),
-        "support_indices": [completions.index(c) for c in supporting_completions],
-        "method": f'Selected the {prefer.replace("_", " ")} completion',
-    }
-def guess_task_name(challenge: str):
-    # TODO: use a pre-trained classifier to guess the task name
-    categories = {
-        "summarization": re.compile("summar|quick rundown|overview"),
-        "date_qa": re.compile(
-            "exact date|tell me when|on what date|on what day|was born?|died?"
-        ),
-        "math": re.compile(
-            "math|solve|solution| sum |problem|geometric|vector|calculate|degrees|decimal|factorial"
-        ),
-    }
-    for task_name, patt in categories.items():
-        if patt.search(challenge):
-            return task_name
-    return "qa"
-# Simulate the stream synapse for the echo endpoint
-class EchoAsyncIterator:
-    def __init__(self, message: str, k: int, delay: float):
-        self.message = message
-        self.k = k
-        self.delay = delay
-    async def __aiter__(self):
-        for _ in range(self.k):
-            for word in self.message.split():
-                yield [word]
-                await asyncio.sleep(self.delay)
-async def echo_stream(request: web.Request) -> web.StreamResponse:
-    request_data = request["data"]
-    k = request_data.get("k", 1)
-    message = "\n\n".join(request_data["messages"])
-    echo_iterator = EchoAsyncIterator(message, k, delay=0.3)
-    streamer = AsyncResponseDataStreamer(echo_iterator, selected_uid=0, delay=0.3)
-    return await streamer.stream(request)

requirements.txt CHANGED Viewed

@@ -1,7 +1,3 @@
-aiohttp
-deprecated
-aiohttp_apispec>=2.2.3
-aiofiles
 streamlit
 plotly
 wandb

 streamlit
 plotly
 wandb