Spaces:

piealamodewhitebread
/

openai_api_key_status

Sleeping

App Files Files Community

sasaki-saku commited on Feb 22

Commit

1e981a9

•

1 Parent(s): 850b679

Update api_usage.py

Browse files

Files changed (1) hide show

api_usage.py +174 -22

api_usage.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import requests
 import os
 import anthropic
 from datetime import datetime
-import json
 BASE_URL = 'https://api.openai.com/v1'
 GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
@@ -87,14 +90,17 @@ def get_subscription(key, org_list):
             "rpm": rpm,
             "tpm": tpm,
             "quota": quota}
-def format_status(list_models_avai, headers):
-    rpm = []
-    tpm = []
-    quota = ""
-    for model in list_models_avai:
-        req_body = {"model": model, "messages": [{'role':'user', 'content': ''}], "max_tokens": -0}
-        r = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
         result = r.json()
         if "error" in result:
             e = result.get("error", {}).get("code", "")
@@ -105,8 +111,8 @@ def format_status(list_models_avai, headers):
                 _rpm = '{:,}'.format(rpm_num).replace(',', ' ')
                 _tpm = '{:,}'.format(tpm_num).replace(',', ' ')
                 _tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
-                rpm.append(f"{_rpm} ({model})")
-                tpm.append(f"{_tpm} ({_tpm_left} left, {model})")
                 dictCount = 0
                 dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
@@ -118,15 +124,33 @@ def format_status(list_models_avai, headers):
                         else:
                             dictCount+=1
                             if dictCount == dictLength:
-                                quota = "yes | custom-tier"
-                elif model == GPT_TYPES[0] and quota == "":
-                    quota = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
-                else:
-                    continue
             else:
-                rpm.append(f"0 ({model})")
-                tpm.append(f"0 ({model})")
-                quota = e
     rpm_str = ""
     tpm_str = ""
     for i in range(len(rpm)):
@@ -177,7 +201,7 @@ def check_key_ant_availability(ant):
         #print(e.__cause__)  # an underlying Exception, likely raised within httpx.
         return False, "Error: The server could not be reached", ""
     except anthropic.RateLimitError as e:
-        return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed).", ""
     except anthropic.APIStatusError as e:
         err_msg = e.response.json().get('error', {}).get('message', '')
         return False, f"Error: {e.status_code}, {err_msg}", ""
@@ -295,7 +319,7 @@ def get_azure_status(endpoint, api_key, deployments_list):
             has_turbo = True
     if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
-        return "No GPT model to check.", has_32k, has_gpt4turbo, has_gpt4, has_turbo
     else:
         if has_gpt4:
             has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
@@ -335,7 +359,7 @@ def check_key_mistral_availability(key):
             return False
         return True
     except:
-        return "Error while making request."
 def check_mistral_quota(key):
     try:
@@ -353,6 +377,134 @@ def check_mistral_quota(key):
     except:
         return "Error while making request."
 if __name__ == "__main__":
     key = os.getenv("OPENAI_API_KEY")
     key_ant = os.getenv("ANTHROPIC_API_KEY")

 import requests
+import json
 import os
 import anthropic
 from datetime import datetime
+import boto3
+import botocore.exceptions
+import concurrent.futures
 BASE_URL = 'https://api.openai.com/v1'
 GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
             "rpm": rpm,
             "tpm": tpm,
             "quota": quota}
+def send_oai_completions(oai_stuff):
+    session = oai_stuff[0]
+    headers = oai_stuff[1]
+    model = oai_stuff[2]
+    try:
+        req_body = {"model": model, "max_tokens": 1}
+        rpm_string = ""
+        tpm_string = ""
+        quota_string = ""
+        r = session.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
         result = r.json()
         if "error" in result:
             e = result.get("error", {}).get("code", "")
                 _rpm = '{:,}'.format(rpm_num).replace(',', ' ')
                 _tpm = '{:,}'.format(tpm_num).replace(',', ' ')
                 _tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
+                rpm_string = f"{_rpm} ({model})"
+                tpm_string = f"{_tpm} ({_tpm_left} left, {model})"
                 dictCount = 0
                 dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
                         else:
                             dictCount+=1
                             if dictCount == dictLength:
+                                quota_string = "yes | custom-tier"
+                elif model == GPT_TYPES[0] and quota_string == "":
+                    quota_string = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
             else:
+                rpm_string = f"0 ({model})"
+                tpm_string = f"0 ({model})"
+                quota_string = e
+        return rpm_string, tpm_string, quota_string
+    except Exception as e:
+        #print(e)
+        return "", "", ""
+def helper_oai(oai_stuff):
+    return send_oai_completions(oai_stuff)
+def format_status(list_models_avai, headers):
+    rpm = []
+    tpm = []
+    quota = ""
+    r = requests.Session()
+    args = [(r, headers, model) for model in list_models_avai]
+    with concurrent.futures.ThreadPoolExecutor() as executer:
+        for result in executer.map(helper_oai, args):
+            rpm.append(result[0])
+            tpm.append(result[1])
+            if result[2]:
+                quota = result[2]
     rpm_str = ""
     tpm_str = ""
     for i in range(len(rpm)):
         #print(e.__cause__)  # an underlying Exception, likely raised within httpx.
         return False, "Error: The server could not be reached", ""
     except anthropic.RateLimitError as e:
+        return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed)", ""
     except anthropic.APIStatusError as e:
         err_msg = e.response.json().get('error', {}).get('message', '')
         return False, f"Error: {e.status_code}, {err_msg}", ""
             has_turbo = True
     if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
+        return "No GPT deployment to check", has_32k, has_gpt4turbo, has_gpt4, has_turbo
     else:
         if has_gpt4:
             has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
             return False
         return True
     except:
+        return "Error while making request"
 def check_mistral_quota(key):
     try:
     except:
         return "Error while making request."
+def check_key_replicate_availability(key):
+    try:
+        url = 'https://api.replicate.com/v1/account'
+        headers = {'Authorization': f'Token {key}'}
+        rq = requests.get(url, headers=headers)
+        info = rq.json()
+        if rq.status_code == 401:
+            return False, "", ""
+        url = 'https://api.replicate.com/v1/hardware'
+        rq = requests.get(url, headers=headers)
+        result = rq.json()
+        hardware = []
+        if result:
+            hardware = [res['name'] for res in result]
+        return True, info, hardware
+    except:
+        return "Unknown", "", "Error while making request"
+def check_key_aws_availability(key):
+    access_id = key.split(':')[0]
+    access_secret = key.split(':')[1]
+    root = False
+    admin = False
+    billing = False
+    quarantine = False
+    iam_users_perm = False
+    iam_policies_perm = False
+    session = boto3.Session(
+        aws_access_key_id=access_id,
+        aws_secret_access_key=access_secret
+    )
+    iam = session.client('iam')
+    username = check_username(session)
+    #print(username)
+    if not username[0]:
+        return False, "", "", "", "", username[1], ""
+    if username[0] == 'root':
+        root = True
+        admin = True
+    if not root:
+        policies = check_policy(iam, username[0])
+        if policies[0]:
+            for policy in policies[1]:
+                if policy['PolicyName'] == 'AdministratorAccess':
+                    admin = True
+                if policy['PolicyName'] == 'AWSCompromisedKeyQuarantineV2':
+                    quarantine = True
+    enable_region = check_bedrock_invoke(session)
+    cost = check_aws_billing(session)
+    if enable_region:
+        return True, username[0], root, admin, quarantine, enable_region, cost
+    if root or admin:
+        return True, username[0], root, admin, quarantine, "No region has claude enabled yet", cost
+    return True, username[0], root, admin, quarantine, "Not enough permission to activate claude bedrock", cost
+def check_username(session):
+    try:
+        sts = session.client('sts')
+        sts_iden = sts.get_caller_identity()
+        if len(sts_iden['Arn'].split('/')) > 1:
+            return sts_iden['Arn'].split('/')[1], "Valid"
+        return sts_iden['Arn'].split(':')[5], "Valid"
+    except botocore.exceptions.ClientError as error:
+        return False, error.response['Error']['Code']
+def check_policy(iam, username):
+    try:
+        iam_policies = iam.list_attached_user_policies(UserName=username)
+        return True, iam_policies['AttachedPolicies']
+    except botocore.exceptions.ClientError as error:
+        return False, error.response['Error']['Code']
+def invoke_claude(session, region):
+    try:
+        bedrock_runtime = session.client("bedrock-runtime", region_name=region)
+        body = json.dumps({
+            "prompt": "\n\nHuman:\n\nAssistant:",
+            "max_tokens_to_sample": 0
+        })
+        response = bedrock_runtime.invoke_model(body=body, modelId="anthropic.claude-v2:1")
+    except bedrock_runtime.exceptions.ValidationException as error:
+        #print(error.response['Error'])
+        return region
+    except bedrock_runtime.exceptions.AccessDeniedException as error:
+        #print(error.response['Error'])
+        return
+    except bedrock_runtime.exceptions.ResourceNotFoundException as error:
+        #print(error.response['Error'])
+        return
+    except Exception as e:
+        #print(e)
+        return
+def check_bedrock_invoke(session):
+    regions = ['us-east-1', 'us-west-2', 'eu-central-1', 'ap-southeast-1', 'ap-northeast-1']
+    enable_region = []
+    with concurrent.futures.ThreadPoolExecutor() as executer:
+        futures = [executer.submit(invoke_claude, session, region) for region in regions]
+        for future in concurrent.futures.as_completed(futures):
+            if future.result():
+                enable_region.append(future.result())
+    return enable_region
+def check_aws_billing(session):
+    try:
+        ce = session.client('ce')
+        now = datetime.now()
+        start_date = now.replace(day=1).strftime('%Y-%m-%d')
+        end_date = (now.replace(day=1, month=now.month % 12 + 1, year=now.year + (now.month // 12)).strftime('%Y-%m-%d'))
+        ce_cost = ce.get_cost_and_usage(
+            TimePeriod={ 'Start': start_date, 'End': end_date },
+            Granularity='MONTHLY',
+            Metrics=['BlendedCost']
+        )
+        return ce_cost['ResultsByTime']
+    except botocore.exceptions.ClientError as error:
+        return error.response['Error']['Message']
 if __name__ == "__main__":
     key = os.getenv("OPENAI_API_KEY")
     key_ant = os.getenv("ANTHROPIC_API_KEY")