sasaki-saku commited on
Commit
1e981a9
1 Parent(s): 850b679

Update api_usage.py

Browse files
Files changed (1) hide show
  1. api_usage.py +174 -22
api_usage.py CHANGED
@@ -1,8 +1,11 @@
1
  import requests
 
2
  import os
3
  import anthropic
4
  from datetime import datetime
5
- import json
 
 
6
 
7
  BASE_URL = 'https://api.openai.com/v1'
8
  GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
@@ -87,14 +90,17 @@ def get_subscription(key, org_list):
87
  "rpm": rpm,
88
  "tpm": tpm,
89
  "quota": quota}
90
-
91
- def format_status(list_models_avai, headers):
92
- rpm = []
93
- tpm = []
94
- quota = ""
95
- for model in list_models_avai:
96
- req_body = {"model": model, "messages": [{'role':'user', 'content': ''}], "max_tokens": -0}
97
- r = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
 
 
 
98
  result = r.json()
99
  if "error" in result:
100
  e = result.get("error", {}).get("code", "")
@@ -105,8 +111,8 @@ def format_status(list_models_avai, headers):
105
  _rpm = '{:,}'.format(rpm_num).replace(',', ' ')
106
  _tpm = '{:,}'.format(tpm_num).replace(',', ' ')
107
  _tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
108
- rpm.append(f"{_rpm} ({model})")
109
- tpm.append(f"{_tpm} ({_tpm_left} left, {model})")
110
  dictCount = 0
111
  dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
112
 
@@ -118,15 +124,33 @@ def format_status(list_models_avai, headers):
118
  else:
119
  dictCount+=1
120
  if dictCount == dictLength:
121
- quota = "yes | custom-tier"
122
- elif model == GPT_TYPES[0] and quota == "":
123
- quota = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
124
- else:
125
- continue
126
  else:
127
- rpm.append(f"0 ({model})")
128
- tpm.append(f"0 ({model})")
129
- quota = e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  rpm_str = ""
131
  tpm_str = ""
132
  for i in range(len(rpm)):
@@ -177,7 +201,7 @@ def check_key_ant_availability(ant):
177
  #print(e.__cause__) # an underlying Exception, likely raised within httpx.
178
  return False, "Error: The server could not be reached", ""
179
  except anthropic.RateLimitError as e:
180
- return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed).", ""
181
  except anthropic.APIStatusError as e:
182
  err_msg = e.response.json().get('error', {}).get('message', '')
183
  return False, f"Error: {e.status_code}, {err_msg}", ""
@@ -295,7 +319,7 @@ def get_azure_status(endpoint, api_key, deployments_list):
295
  has_turbo = True
296
 
297
  if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
298
- return "No GPT model to check.", has_32k, has_gpt4turbo, has_gpt4, has_turbo
299
  else:
300
  if has_gpt4:
301
  has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
@@ -335,7 +359,7 @@ def check_key_mistral_availability(key):
335
  return False
336
  return True
337
  except:
338
- return "Error while making request."
339
 
340
  def check_mistral_quota(key):
341
  try:
@@ -353,6 +377,134 @@ def check_mistral_quota(key):
353
  except:
354
  return "Error while making request."
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  if __name__ == "__main__":
357
  key = os.getenv("OPENAI_API_KEY")
358
  key_ant = os.getenv("ANTHROPIC_API_KEY")
 
1
  import requests
2
+ import json
3
  import os
4
  import anthropic
5
  from datetime import datetime
6
+ import boto3
7
+ import botocore.exceptions
8
+ import concurrent.futures
9
 
10
  BASE_URL = 'https://api.openai.com/v1'
11
  GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
 
90
  "rpm": rpm,
91
  "tpm": tpm,
92
  "quota": quota}
93
+
94
+ def send_oai_completions(oai_stuff):
95
+ session = oai_stuff[0]
96
+ headers = oai_stuff[1]
97
+ model = oai_stuff[2]
98
+ try:
99
+ req_body = {"model": model, "max_tokens": 1}
100
+ rpm_string = ""
101
+ tpm_string = ""
102
+ quota_string = ""
103
+ r = session.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
104
  result = r.json()
105
  if "error" in result:
106
  e = result.get("error", {}).get("code", "")
 
111
  _rpm = '{:,}'.format(rpm_num).replace(',', ' ')
112
  _tpm = '{:,}'.format(tpm_num).replace(',', ' ')
113
  _tpm_left = '{:,}'.format(tpm_left).replace(',', ' ')
114
+ rpm_string = f"{_rpm} ({model})"
115
+ tpm_string = f"{_tpm} ({_tpm_left} left, {model})"
116
  dictCount = 0
117
  dictLength = len(TOKEN_LIMIT_PER_TIER_GPT4)
118
 
 
124
  else:
125
  dictCount+=1
126
  if dictCount == dictLength:
127
+ quota_string = "yes | custom-tier"
128
+ elif model == GPT_TYPES[0] and quota_string == "":
129
+ quota_string = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
 
 
130
  else:
131
+ rpm_string = f"0 ({model})"
132
+ tpm_string = f"0 ({model})"
133
+ quota_string = e
134
+ return rpm_string, tpm_string, quota_string
135
+ except Exception as e:
136
+ #print(e)
137
+ return "", "", ""
138
+
139
+ def helper_oai(oai_stuff):
140
+ return send_oai_completions(oai_stuff)
141
+
142
+ def format_status(list_models_avai, headers):
143
+ rpm = []
144
+ tpm = []
145
+ quota = ""
146
+ r = requests.Session()
147
+ args = [(r, headers, model) for model in list_models_avai]
148
+ with concurrent.futures.ThreadPoolExecutor() as executer:
149
+ for result in executer.map(helper_oai, args):
150
+ rpm.append(result[0])
151
+ tpm.append(result[1])
152
+ if result[2]:
153
+ quota = result[2]
154
  rpm_str = ""
155
  tpm_str = ""
156
  for i in range(len(rpm)):
 
201
  #print(e.__cause__) # an underlying Exception, likely raised within httpx.
202
  return False, "Error: The server could not be reached", ""
203
  except anthropic.RateLimitError as e:
204
+ return True, "Error: 429, rate limited; we should back off a bit(retry 5 times failed)", ""
205
  except anthropic.APIStatusError as e:
206
  err_msg = e.response.json().get('error', {}).get('message', '')
207
  return False, f"Error: {e.status_code}, {err_msg}", ""
 
319
  has_turbo = True
320
 
321
  if not list_model: #has_32k == False and has_gpt4 == False and has_turbo == False:
322
+ return "No GPT deployment to check", has_32k, has_gpt4turbo, has_gpt4, has_turbo
323
  else:
324
  if has_gpt4:
325
  has_gpt4turbo = check_gpt4turbo(endpoint, api_key, list_model['gpt-4'])
 
359
  return False
360
  return True
361
  except:
362
+ return "Error while making request"
363
 
364
  def check_mistral_quota(key):
365
  try:
 
377
  except:
378
  return "Error while making request."
379
 
380
+ def check_key_replicate_availability(key):
381
+ try:
382
+ url = 'https://api.replicate.com/v1/account'
383
+ headers = {'Authorization': f'Token {key}'}
384
+
385
+ rq = requests.get(url, headers=headers)
386
+ info = rq.json()
387
+ if rq.status_code == 401:
388
+ return False, "", ""
389
+
390
+ url = 'https://api.replicate.com/v1/hardware'
391
+ rq = requests.get(url, headers=headers)
392
+ result = rq.json()
393
+ hardware = []
394
+ if result:
395
+ hardware = [res['name'] for res in result]
396
+ return True, info, hardware
397
+ except:
398
+ return "Unknown", "", "Error while making request"
399
+
400
+ def check_key_aws_availability(key):
401
+ access_id = key.split(':')[0]
402
+ access_secret = key.split(':')[1]
403
+
404
+ root = False
405
+ admin = False
406
+ billing = False
407
+ quarantine = False
408
+ iam_users_perm = False
409
+ iam_policies_perm = False
410
+
411
+ session = boto3.Session(
412
+ aws_access_key_id=access_id,
413
+ aws_secret_access_key=access_secret
414
+ )
415
+
416
+ iam = session.client('iam')
417
+
418
+ username = check_username(session)
419
+ #print(username)
420
+ if not username[0]:
421
+ return False, "", "", "", "", username[1], ""
422
+
423
+ if username[0] == 'root':
424
+ root = True
425
+ admin = True
426
+
427
+ if not root:
428
+ policies = check_policy(iam, username[0])
429
+ if policies[0]:
430
+ for policy in policies[1]:
431
+ if policy['PolicyName'] == 'AdministratorAccess':
432
+ admin = True
433
+ if policy['PolicyName'] == 'AWSCompromisedKeyQuarantineV2':
434
+ quarantine = True
435
+
436
+ enable_region = check_bedrock_invoke(session)
437
+ cost = check_aws_billing(session)
438
+ if enable_region:
439
+ return True, username[0], root, admin, quarantine, enable_region, cost
440
+ if root or admin:
441
+ return True, username[0], root, admin, quarantine, "No region has claude enabled yet", cost
442
+ return True, username[0], root, admin, quarantine, "Not enough permission to activate claude bedrock", cost
443
+
444
+ def check_username(session):
445
+ try:
446
+ sts = session.client('sts')
447
+ sts_iden = sts.get_caller_identity()
448
+ if len(sts_iden['Arn'].split('/')) > 1:
449
+ return sts_iden['Arn'].split('/')[1], "Valid"
450
+
451
+ return sts_iden['Arn'].split(':')[5], "Valid"
452
+ except botocore.exceptions.ClientError as error:
453
+ return False, error.response['Error']['Code']
454
+
455
+ def check_policy(iam, username):
456
+ try:
457
+ iam_policies = iam.list_attached_user_policies(UserName=username)
458
+ return True, iam_policies['AttachedPolicies']
459
+ except botocore.exceptions.ClientError as error:
460
+ return False, error.response['Error']['Code']
461
+
462
+ def invoke_claude(session, region):
463
+ try:
464
+ bedrock_runtime = session.client("bedrock-runtime", region_name=region)
465
+ body = json.dumps({
466
+ "prompt": "\n\nHuman:\n\nAssistant:",
467
+ "max_tokens_to_sample": 0
468
+ })
469
+ response = bedrock_runtime.invoke_model(body=body, modelId="anthropic.claude-v2:1")
470
+ except bedrock_runtime.exceptions.ValidationException as error:
471
+ #print(error.response['Error'])
472
+ return region
473
+ except bedrock_runtime.exceptions.AccessDeniedException as error:
474
+ #print(error.response['Error'])
475
+ return
476
+ except bedrock_runtime.exceptions.ResourceNotFoundException as error:
477
+ #print(error.response['Error'])
478
+ return
479
+ except Exception as e:
480
+ #print(e)
481
+ return
482
+
483
+ def check_bedrock_invoke(session):
484
+ regions = ['us-east-1', 'us-west-2', 'eu-central-1', 'ap-southeast-1', 'ap-northeast-1']
485
+ enable_region = []
486
+ with concurrent.futures.ThreadPoolExecutor() as executer:
487
+ futures = [executer.submit(invoke_claude, session, region) for region in regions]
488
+ for future in concurrent.futures.as_completed(futures):
489
+ if future.result():
490
+ enable_region.append(future.result())
491
+ return enable_region
492
+
493
+ def check_aws_billing(session):
494
+ try:
495
+ ce = session.client('ce')
496
+ now = datetime.now()
497
+ start_date = now.replace(day=1).strftime('%Y-%m-%d')
498
+ end_date = (now.replace(day=1, month=now.month % 12 + 1, year=now.year + (now.month // 12)).strftime('%Y-%m-%d'))
499
+ ce_cost = ce.get_cost_and_usage(
500
+ TimePeriod={ 'Start': start_date, 'End': end_date },
501
+ Granularity='MONTHLY',
502
+ Metrics=['BlendedCost']
503
+ )
504
+ return ce_cost['ResultsByTime']
505
+ except botocore.exceptions.ClientError as error:
506
+ return error.response['Error']['Message']
507
+
508
  if __name__ == "__main__":
509
  key = os.getenv("OPENAI_API_KEY")
510
  key_ant = os.getenv("ANTHROPIC_API_KEY")