sasaki-saku
commited on
Commit
•
1e981a9
1
Parent(s):
850b679
Update api_usage.py
Browse files- api_usage.py +174 -22
api_usage.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
import requests
|
|
|
2 |
import os
|
3 |
import anthropic
|
4 |
from datetime import datetime
|
5 |
-
import
|
|
|
|
|
6 |
|
7 |
BASE_URL = 'https://api.openai.com/v1'
|
8 |
GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
|
@@ -87,14 +90,17 @@ def get_subscription(key, org_list):
|
|
87 |
"rpm": rpm,
|
88 |
"tpm": tpm,
|
89 |
"quota": quota}
|
90 |
-
|
91 |
-
def
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
req_body = {"model": model, "
|
97 |
-
|
|
|
|
|
|
|
98 |
result = r.json()
|
99 |
if "error" in result:
|
100 |
e = result.get("error", {}).get("code", "")
|
@@ -105,8 +111,8 @@ def format_status(list_models_avai, headers):
|
|
105 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
106 |
_tpm = '{:,}'.format(tpm_num).replace(',', ' ')
|
107 |
_tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
|
108 |
-
|
109 |
-
|
110 |
dictCount = 0
|
111 |
dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
|
112 |
|
@@ -118,15 +124,33 @@ def format_status(list_models_avai, headers):
|
|
118 |
else:
|
119 |
dictCount+=1
|
120 |
if dictCount == dictLength:
|
121 |
-
|
122 |
-
elif model == GPT_TYPES[0] and
|
123 |
-
|
124 |
-
else:
|
125 |
-
continue
|
126 |
else:
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
rpm_str = ""
|
131 |
tpm_str = ""
|
132 |
for i in range(len(rpm)):
|
@@ -177,7 +201,7 @@ def check_key_ant_availability(ant):
|
|
177 |
#print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
178 |
return False, "Error: The server could not be reached", ""
|
179 |
except anthropic.RateLimitError as e:
|
180 |
-
return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed)
|
181 |
except anthropic.APIStatusError as e:
|
182 |
err_msg = e.response.json().get('error', {}).get('message', '')
|
183 |
return False, f"Error: {e.status_code}, {err_msg}", ""
|
@@ -295,7 +319,7 @@ def get_azure_status(endpoint, api_key, deployments_list):
|
|
295 |
has_turbo = True
|
296 |
|
297 |
if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
|
298 |
-
return "No GPT
|
299 |
else:
|
300 |
if has_gpt4:
|
301 |
has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
|
@@ -335,7 +359,7 @@ def check_key_mistral_availability(key):
|
|
335 |
return False
|
336 |
return True
|
337 |
except:
|
338 |
-
return "Error while making request
|
339 |
|
340 |
def check_mistral_quota(key):
|
341 |
try:
|
@@ -353,6 +377,134 @@ def check_mistral_quota(key):
|
|
353 |
except:
|
354 |
return "Error while making request."
|
355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
if __name__ == "__main__":
|
357 |
key = os.getenv("OPENAI_API_KEY")
|
358 |
key_ant = os.getenv("ANTHROPIC_API_KEY")
|
|
|
1 |
import requests
|
2 |
+
import json
|
3 |
import os
|
4 |
import anthropic
|
5 |
from datetime import datetime
|
6 |
+
import boto3
|
7 |
+
import botocore.exceptions
|
8 |
+
import concurrent.futures
|
9 |
|
10 |
BASE_URL = 'https://api.openai.com/v1'
|
11 |
GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
|
|
|
90 |
"rpm": rpm,
|
91 |
"tpm": tpm,
|
92 |
"quota": quota}
|
93 |
+
|
94 |
+
def send_oai_completions(oai_stuff):
|
95 |
+
session = oai_stuff[0]
|
96 |
+
headers = oai_stuff[1]
|
97 |
+
model = oai_stuff[2]
|
98 |
+
try:
|
99 |
+
req_body = {"model": model, "max_tokens": 1}
|
100 |
+
rpm_string = ""
|
101 |
+
tpm_string = ""
|
102 |
+
quota_string = ""
|
103 |
+
r = session.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
|
104 |
result = r.json()
|
105 |
if "error" in result:
|
106 |
e = result.get("error", {}).get("code", "")
|
|
|
111 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
112 |
_tpm = '{:,}'.format(tpm_num).replace(',', ' ')
|
113 |
_tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
|
114 |
+
rpm_string = f"{_rpm} ({model})"
|
115 |
+
tpm_string = f"{_tpm} ({_tpm_left} left, {model})"
|
116 |
dictCount = 0
|
117 |
dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
|
118 |
|
|
|
124 |
else:
|
125 |
dictCount+=1
|
126 |
if dictCount == dictLength:
|
127 |
+
quota_string = "yes | custom-tier"
|
128 |
+
elif model == GPT_TYPES[0] and quota_string == "":
|
129 |
+
quota_string = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
|
|
|
|
|
130 |
else:
|
131 |
+
rpm_string = f"0 ({model})"
|
132 |
+
tpm_string = f"0 ({model})"
|
133 |
+
quota_string = e
|
134 |
+
return rpm_string, tpm_string, quota_string
|
135 |
+
except Exception as e:
|
136 |
+
#print(e)
|
137 |
+
return "", "", ""
|
138 |
+
|
139 |
+
def helper_oai(oai_stuff):
|
140 |
+
return send_oai_completions(oai_stuff)
|
141 |
+
|
142 |
+
def format_status(list_models_avai, headers):
|
143 |
+
rpm = []
|
144 |
+
tpm = []
|
145 |
+
quota = ""
|
146 |
+
r = requests.Session()
|
147 |
+
args = [(r, headers, model) for model in list_models_avai]
|
148 |
+
with concurrent.futures.ThreadPoolExecutor() as executer:
|
149 |
+
for result in executer.map(helper_oai, args):
|
150 |
+
rpm.append(result[0])
|
151 |
+
tpm.append(result[1])
|
152 |
+
if result[2]:
|
153 |
+
quota = result[2]
|
154 |
rpm_str = ""
|
155 |
tpm_str = ""
|
156 |
for i in range(len(rpm)):
|
|
|
201 |
#print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
202 |
return False, "Error: The server could not be reached", ""
|
203 |
except anthropic.RateLimitError as e:
|
204 |
+
return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed)", ""
|
205 |
except anthropic.APIStatusError as e:
|
206 |
err_msg = e.response.json().get('error', {}).get('message', '')
|
207 |
return False, f"Error: {e.status_code}, {err_msg}", ""
|
|
|
319 |
has_turbo = True
|
320 |
|
321 |
if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
|
322 |
+
return "No GPT deployment to check", has_32k, has_gpt4turbo, has_gpt4, has_turbo
|
323 |
else:
|
324 |
if has_gpt4:
|
325 |
has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
|
|
|
359 |
return False
|
360 |
return True
|
361 |
except:
|
362 |
+
return "Error while making request"
|
363 |
|
364 |
def check_mistral_quota(key):
|
365 |
try:
|
|
|
377 |
except:
|
378 |
return "Error while making request."
|
379 |
|
380 |
+
def check_key_replicate_availability(key):
|
381 |
+
try:
|
382 |
+
url = 'https://api.replicate.com/v1/account'
|
383 |
+
headers = {'Authorization': f'Token {key}'}
|
384 |
+
|
385 |
+
rq = requests.get(url, headers=headers)
|
386 |
+
info = rq.json()
|
387 |
+
if rq.status_code == 401:
|
388 |
+
return False, "", ""
|
389 |
+
|
390 |
+
url = 'https://api.replicate.com/v1/hardware'
|
391 |
+
rq = requests.get(url, headers=headers)
|
392 |
+
result = rq.json()
|
393 |
+
hardware = []
|
394 |
+
if result:
|
395 |
+
hardware = [res['name'] for res in result]
|
396 |
+
return True, info, hardware
|
397 |
+
except:
|
398 |
+
return "Unknown", "", "Error while making request"
|
399 |
+
|
400 |
+
def check_key_aws_availability(key):
|
401 |
+
access_id = key.split(':')[0]
|
402 |
+
access_secret = key.split(':')[1]
|
403 |
+
|
404 |
+
root = False
|
405 |
+
admin = False
|
406 |
+
billing = False
|
407 |
+
quarantine = False
|
408 |
+
iam_users_perm = False
|
409 |
+
iam_policies_perm = False
|
410 |
+
|
411 |
+
session = boto3.Session(
|
412 |
+
aws_access_key_id=access_id,
|
413 |
+
aws_secret_access_key=access_secret
|
414 |
+
)
|
415 |
+
|
416 |
+
iam = session.client('iam')
|
417 |
+
|
418 |
+
username = check_username(session)
|
419 |
+
#print(username)
|
420 |
+
if not username[0]:
|
421 |
+
return False, "", "", "", "", username[1], ""
|
422 |
+
|
423 |
+
if username[0] == 'root':
|
424 |
+
root = True
|
425 |
+
admin = True
|
426 |
+
|
427 |
+
if not root:
|
428 |
+
policies = check_policy(iam, username[0])
|
429 |
+
if policies[0]:
|
430 |
+
for policy in policies[1]:
|
431 |
+
if policy['PolicyName'] == 'AdministratorAccess':
|
432 |
+
admin = True
|
433 |
+
if policy['PolicyName'] == 'AWSCompromisedKeyQuarantineV2':
|
434 |
+
quarantine = True
|
435 |
+
|
436 |
+
enable_region = check_bedrock_invoke(session)
|
437 |
+
cost = check_aws_billing(session)
|
438 |
+
if enable_region:
|
439 |
+
return True, username[0], root, admin, quarantine, enable_region, cost
|
440 |
+
if root or admin:
|
441 |
+
return True, username[0], root, admin, quarantine, "No region has claude enabled yet", cost
|
442 |
+
return True, username[0], root, admin, quarantine, "Not enough permission to activate claude bedrock", cost
|
443 |
+
|
444 |
+
def check_username(session):
|
445 |
+
try:
|
446 |
+
sts = session.client('sts')
|
447 |
+
sts_iden = sts.get_caller_identity()
|
448 |
+
if len(sts_iden['Arn'].split('/')) > 1:
|
449 |
+
return sts_iden['Arn'].split('/')[1], "Valid"
|
450 |
+
|
451 |
+
return sts_iden['Arn'].split(':')[5], "Valid"
|
452 |
+
except botocore.exceptions.ClientError as error:
|
453 |
+
return False, error.response['Error']['Code']
|
454 |
+
|
455 |
+
def check_policy(iam, username):
|
456 |
+
try:
|
457 |
+
iam_policies = iam.list_attached_user_policies(UserName=username)
|
458 |
+
return True, iam_policies['AttachedPolicies']
|
459 |
+
except botocore.exceptions.ClientError as error:
|
460 |
+
return False, error.response['Error']['Code']
|
461 |
+
|
462 |
+
def invoke_claude(session, region):
|
463 |
+
try:
|
464 |
+
bedrock_runtime = session.client("bedrock-runtime", region_name=region)
|
465 |
+
body = json.dumps({
|
466 |
+
"prompt": "\n\nHuman:\n\nAssistant:",
|
467 |
+
"max_tokens_to_sample": 0
|
468 |
+
})
|
469 |
+
response = bedrock_runtime.invoke_model(body=body, modelId="anthropic.claude-v2:1")
|
470 |
+
except bedrock_runtime.exceptions.ValidationException as error:
|
471 |
+
#print(error.response['Error'])
|
472 |
+
return region
|
473 |
+
except bedrock_runtime.exceptions.AccessDeniedException as error:
|
474 |
+
#print(error.response['Error'])
|
475 |
+
return
|
476 |
+
except bedrock_runtime.exceptions.ResourceNotFoundException as error:
|
477 |
+
#print(error.response['Error'])
|
478 |
+
return
|
479 |
+
except Exception as e:
|
480 |
+
#print(e)
|
481 |
+
return
|
482 |
+
|
483 |
+
def check_bedrock_invoke(session):
|
484 |
+
regions = ['us-east-1', 'us-west-2', 'eu-central-1', 'ap-southeast-1', 'ap-northeast-1']
|
485 |
+
enable_region = []
|
486 |
+
with concurrent.futures.ThreadPoolExecutor() as executer:
|
487 |
+
futures = [executer.submit(invoke_claude, session, region) for region in regions]
|
488 |
+
for future in concurrent.futures.as_completed(futures):
|
489 |
+
if future.result():
|
490 |
+
enable_region.append(future.result())
|
491 |
+
return enable_region
|
492 |
+
|
493 |
+
def check_aws_billing(session):
|
494 |
+
try:
|
495 |
+
ce = session.client('ce')
|
496 |
+
now = datetime.now()
|
497 |
+
start_date = now.replace(day=1).strftime('%Y-%m-%d')
|
498 |
+
end_date = (now.replace(day=1, month=now.month % 12 + 1, year=now.year + (now.month // 12)).strftime('%Y-%m-%d'))
|
499 |
+
ce_cost = ce.get_cost_and_usage(
|
500 |
+
TimePeriod={ 'Start': start_date, 'End': end_date },
|
501 |
+
Granularity='MONTHLY',
|
502 |
+
Metrics=['BlendedCost']
|
503 |
+
)
|
504 |
+
return ce_cost['ResultsByTime']
|
505 |
+
except botocore.exceptions.ClientError as error:
|
506 |
+
return error.response['Error']['Message']
|
507 |
+
|
508 |
if __name__ == "__main__":
|
509 |
key = os.getenv("OPENAI_API_KEY")
|
510 |
key_ant = os.getenv("ANTHROPIC_API_KEY")
|