superdup95 commited on
Commit
0f10e18
1 Parent(s): 04be8e0

Update api_usage.py

Browse files
Files changed (1) hide show
  1. api_usage.py +49 -40
api_usage.py CHANGED
@@ -10,16 +10,29 @@ GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
10
  # "gpt-4": 500,
11
  # "gpt-4-32k": 1000
12
  #}
 
 
 
 
 
 
 
 
 
 
 
 
13
  TOKEN_LIMIT_PER_TIER_TURBO = {
14
- "free": 20000,
15
- "tier-1": 40000,
16
  "tier-1(old?)": 90000,
17
  "tier-2": 80000,
18
  "tier-3": 160000,
19
- "tier-4-5": 1000000
 
20
  }
21
  TOKEN_LIMIT_PER_TIER_GPT4 = {
22
- "tier-free-1": 10000,
23
  "tier-2": 40000,
24
  "tier-3": 80000,
25
  "tier-4-5": 300000
@@ -102,29 +115,30 @@ def format_status(list_models_avai, headers):
102
  tpm = []
103
  quota = ""
104
  for model in list_models_avai:
105
- req_body = {"model": model, "messages": [{'role':'user', 'content': ''}], "max_tokens": 1}
106
  r = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
107
- result = r.json()
108
- if "id" in result:
109
- rpm_num = int(r.headers.get("x-ratelimit-limit-requests", 0))
110
- tpm_num = int(r.headers.get('x-ratelimit-limit-tokens', 0))
111
- _rpm = '{:,}'.format(rpm_num).replace(',', ' ')
112
- _tpm = '{:,}'.format(tpm_num).replace(',', ' ')
113
- _tpm_left = '{:,}'.format(int(r.headers.get('x-ratelimit-remaining-tokens', 0))).replace(',', ' ')
114
- rpm.append(f"{_rpm} ({model})")
115
- tpm.append(f"{_tpm} ({_tpm_left} left, {model})")
116
- if model == GPT_TYPES[1]:
117
- quota = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_GPT4, headers)
118
- elif model == GPT_TYPES[0] and len(list_models_avai) == 1:
119
- quota = check_key_tier(rpm_num, tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
120
- else:
121
- continue
122
- else:
123
  e = result.get("error", {}).get("code", "")
124
- rpm.append(f"0 ({model})")
125
- tpm.append(f"0 ({model})")
126
- quota = e
127
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  rpm_str = ""
129
  tpm_str = ""
130
  for i in range(len(rpm)):
@@ -132,24 +146,19 @@ def format_status(list_models_avai, headers):
132
  tpm_str += tpm[i] + (", " if i < len(rpm)-1 else "")
133
  return rpm_str, tpm_str, quota
134
 
135
- def check_key_tier(rpm, tpm, dict, headers):
136
  dictItemsCount = len(dict)
137
  dictCount = 0
138
  for k, v in dict.items():
139
- if tpm == v:
140
- if k == "tier-free-1":
141
- if rpm == 500:
142
- return f"yes | tier-1"
143
- else:
144
- return f"yes | free"
145
- if k == "tier-4-5":
146
- req_body = {"model": "text-embedding-ada-002", "input": "hiii"}
147
- r = requests.post(f"{BASE_URL}/embeddings", headers=headers, json=req_body, timeout=10)
148
- tpm_num = int(r.headers.get('x-ratelimit-limit-tokens', 0))
149
- if tpm_num == 5000000:
150
- return f"yes | tier-4"
151
- else:
152
- return f"yes | tier-5"
153
  return f"yes | {k}"
154
  dictCount+=1
155
  if (dictCount == dictItemsCount):
 
10
  # "gpt-4": 500,
11
  # "gpt-4-32k": 1000
12
  #}
13
+ #RATE_LIMIT_PER_TIER_TURBO = {
14
+ # "free": 200,
15
+ # "tier-1-2-3": 3500,
16
+ # "tier-2-3": 5000,
17
+ # "tier-4-5": 10000
18
+ #}
19
+ #RATE_LIMIT_PER_TIER_GPT4 = {
20
+ # "tier-1": 500,
21
+ # "tier-2-3": 5000,
22
+ # "tier-4-5": 10000
23
+ #}
24
+
25
  TOKEN_LIMIT_PER_TIER_TURBO = {
26
+ "free": 40000,
27
+ "tier-1": 60000,
28
  "tier-1(old?)": 90000,
29
  "tier-2": 80000,
30
  "tier-3": 160000,
31
+ "tier-4": 1000000,
32
+ "tier-5": 2000000
33
  }
34
  TOKEN_LIMIT_PER_TIER_GPT4 = {
35
+ "tier-1": 10000,
36
  "tier-2": 40000,
37
  "tier-3": 80000,
38
  "tier-4-5": 300000
 
115
  tpm = []
116
  quota = ""
117
  for model in list_models_avai:
118
+ req_body = {"model": model, "messages": [{'role':'user', 'content': ''}], "max_tokens": -0}
119
  r = requests.post(f"{BASE_URL}/chat/completions", headers=headers, json=req_body, timeout=10)
120
+ result = r.json()
121
+ if "error" in result:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  e = result.get("error", {}).get("code", "")
123
+ if e == None:
124
+ #print(r.headers)
125
+ rpm_num = int(r.headers.get("x-ratelimit-limit-requests", 0))
126
+ tpm_num = int(r.headers.get("x-ratelimit-limit-tokens_usage_based", 0))
127
+ tpm_left = int(r.headers.get("x-ratelimit-remaining-tokens_usage_based", 0))
128
+ rpm.append(f"{rpm_num} ({model})")
129
+ tpm.append(f"{tpm_num} ({tpm_left} left, {model})")
130
+ if model == GPT_TYPES[0]:
131
+ quota = check_key_tier(tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
132
+ #if model == GPT_TYPES[1]:
133
+ # quota = check_key_tier(tpm_num, TOKEN_LIMIT_PER_TIER_GPT4, headers)
134
+ #elif model == GPT_TYPES[0] and len(list_models_avai) == 1:
135
+ # quota = check_key_tier(tpm_num, TOKEN_LIMIT_PER_TIER_TURBO, headers)
136
+ #else:
137
+ # continue
138
+ else:
139
+ rpm.append(f"0 ({model})")
140
+ tpm.append(f"0 ({model})")
141
+ quota = e
142
  rpm_str = ""
143
  tpm_str = ""
144
  for i in range(len(rpm)):
 
146
  tpm_str += tpm[i] + (", " if i < len(rpm)-1 else "")
147
  return rpm_str, tpm_str, quota
148
 
149
+ def check_key_tier(rpm, dict, headers):
150
  dictItemsCount = len(dict)
151
  dictCount = 0
152
  for k, v in dict.items():
153
+ if rpm == v:
154
+ #if k == "tier-4-5":
155
+ # req_body = {"model": "whisper-1"}
156
+ # r = requests.post(f"{BASE_URL}/audio/transcriptions", headers=headers, json=req_body, timeout=10)
157
+ # rpm_num = int(r.headers.get('x-ratelimit-limit-requests', 0))
158
+ # if rpm_num == 100:
159
+ # return f"yes | tier-4"
160
+ # else:
161
+ # return f"yes | tier-5"
 
 
 
 
 
162
  return f"yes | {k}"
163
  dictCount+=1
164
  if (dictCount == dictItemsCount):