Spaces:
Running
Running
liujch1998
commited on
Commit
•
0d3c7d8
1
Parent(s):
e979a07
Add IP-based throttle
Browse files- app.py +46 -11
- constants.py +2 -0
app.py
CHANGED
@@ -3,6 +3,7 @@ import datetime
|
|
3 |
import json
|
4 |
import os
|
5 |
import requests
|
|
|
6 |
from constants import *
|
7 |
|
8 |
API_IPADDR = os.environ.get('API_IPADDR', None)
|
@@ -11,17 +12,31 @@ max_size = os.environ.get('max_size', 100)
|
|
11 |
max_threads = os.environ.get('max_threads', 40)
|
12 |
debug = (os.environ.get('debug', 'False') != 'False')
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
16 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
17 |
-
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
18 |
data = {
|
19 |
'timestamp': timestamp,
|
|
|
|
|
20 |
'corpus': corpus,
|
21 |
'query_type': query_type,
|
22 |
'query': query,
|
23 |
}
|
24 |
print(json.dumps(data))
|
|
|
|
|
|
|
|
|
25 |
if API_IPADDR is None:
|
26 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
27 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
@@ -33,18 +48,38 @@ def process(corpus_desc, query_desc, query):
|
|
33 |
print(result)
|
34 |
return result
|
35 |
|
36 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
38 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
39 |
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
40 |
data = {
|
41 |
'timestamp': timestamp,
|
|
|
|
|
42 |
'corpus': corpus,
|
43 |
'query_type': query_type,
|
44 |
'query': query,
|
45 |
'maxnum': maxnum,
|
46 |
}
|
47 |
print(json.dumps(data))
|
|
|
|
|
|
|
|
|
48 |
if API_IPADDR is None:
|
49 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
50 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
@@ -60,7 +95,7 @@ def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum):
|
|
60 |
outputs = outputs[:maxnum]
|
61 |
while len(outputs) < 10:
|
62 |
outputs.append([])
|
63 |
-
return
|
64 |
|
65 |
with gr.Blocks() as demo:
|
66 |
with gr.Column():
|
@@ -281,14 +316,14 @@ If you find this tool useful, please kindly cite our paper:
|
|
281 |
ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
|
282 |
doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])
|
283 |
|
284 |
-
count_submit.click(
|
285 |
-
ngram_submit.click(
|
286 |
-
ntd_submit.click(
|
287 |
-
infgram_submit.click(
|
288 |
-
infntd_submit.click(
|
289 |
# ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message], api_name=False)
|
290 |
-
ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[
|
291 |
-
doc_analysis_submit.click(
|
292 |
|
293 |
def update_query_desc(selection):
|
294 |
return {
|
|
|
3 |
import json
|
4 |
import os
|
5 |
import requests
|
6 |
+
import time
|
7 |
from constants import *
|
8 |
|
9 |
API_IPADDR = os.environ.get('API_IPADDR', None)
|
|
|
12 |
max_threads = os.environ.get('max_threads', 40)
|
13 |
debug = (os.environ.get('debug', 'False') != 'False')
|
14 |
|
15 |
+
last_query_time_by_ip = {}
|
16 |
+
|
17 |
+
def process(corpus_desc, query_desc, query, ret_num, request: gr.Request):
|
18 |
+
global last_query_time_by_ip
|
19 |
+
ip = request.client.host if request else ''
|
20 |
+
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
21 |
+
t = time.time()
|
22 |
+
last_query_time = 0 if ip == '' else last_query_time_by_ip.get(ip, 0)
|
23 |
+
blocked = (t - last_query_time < MIN_QUERY_INTERVAL_SECONDS)
|
24 |
+
|
25 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
26 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
|
|
27 |
data = {
|
28 |
'timestamp': timestamp,
|
29 |
+
'ip': ip,
|
30 |
+
'blocked': blocked,
|
31 |
'corpus': corpus,
|
32 |
'query_type': query_type,
|
33 |
'query': query,
|
34 |
}
|
35 |
print(json.dumps(data))
|
36 |
+
if blocked:
|
37 |
+
return tuple([f'You queried too frequently. Please try again in {MIN_QUERY_INTERVAL_SECONDS} seconds.'] + [''] * (ret_num - 1))
|
38 |
+
if ip != '':
|
39 |
+
last_query_time_by_ip[ip] = t
|
40 |
if API_IPADDR is None:
|
41 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
42 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
|
|
48 |
print(result)
|
49 |
return result
|
50 |
|
51 |
+
def process_1(corpus_desc, query_desc, query, request: gr.Request):
|
52 |
+
return process(corpus_desc, query_desc, query, 1, request)
|
53 |
+
def process_2(corpus_desc, query_desc, query, request: gr.Request):
|
54 |
+
return process(corpus_desc, query_desc, query, 2, request)
|
55 |
+
def process_3(corpus_desc, query_desc, query, request: gr.Request):
|
56 |
+
return process(corpus_desc, query_desc, query, 3, request)
|
57 |
+
|
58 |
+
def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum, request: gr.Request):
|
59 |
+
global last_query_time_by_ip
|
60 |
+
ip = request.client.host if request else ''
|
61 |
+
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
62 |
+
t = time.time()
|
63 |
+
last_query_time = 0 if ip == '' else last_query_time_by_ip.get(ip, 0)
|
64 |
+
blocked = (t - last_query_time < MIN_QUERY_INTERVAL_SECONDS)
|
65 |
+
|
66 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
67 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
68 |
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
69 |
data = {
|
70 |
'timestamp': timestamp,
|
71 |
+
'ip': ip,
|
72 |
+
'blocked': blocked,
|
73 |
'corpus': corpus,
|
74 |
'query_type': query_type,
|
75 |
'query': query,
|
76 |
'maxnum': maxnum,
|
77 |
}
|
78 |
print(json.dumps(data))
|
79 |
+
if blocked:
|
80 |
+
return tuple([f'You queried too frequently. Please try again in {MIN_QUERY_INTERVAL_SECONDS} seconds.'] + [''] * 11)
|
81 |
+
if ip != '':
|
82 |
+
last_query_time_by_ip[ip] = t
|
83 |
if API_IPADDR is None:
|
84 |
raise ValueError(f'API_IPADDR envvar is not set!')
|
85 |
response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
|
|
|
95 |
outputs = outputs[:maxnum]
|
96 |
while len(outputs) < 10:
|
97 |
outputs.append([])
|
98 |
+
return message, output_tokens, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9]
|
99 |
|
100 |
with gr.Blocks() as demo:
|
101 |
with gr.Column():
|
|
|
316 |
ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
|
317 |
doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])
|
318 |
|
319 |
+
count_submit.click(process_2, inputs=[corpus_desc, query_desc, count_input], outputs=[count_output, count_output_tokens], api_name=False)
|
320 |
+
ngram_submit.click(process_2, inputs=[corpus_desc, query_desc, ngram_input], outputs=[ngram_output, ngram_output_tokens], api_name=False)
|
321 |
+
ntd_submit.click(process_2, inputs=[corpus_desc, query_desc, ntd_input], outputs=[ntd_output, ntd_output_tokens], api_name=False)
|
322 |
+
infgram_submit.click(process_3, inputs=[corpus_desc, query_desc, infgram_input], outputs=[infgram_output, infgram_output_tokens, infgram_longest_suffix], api_name=False)
|
323 |
+
infntd_submit.click(process_3, inputs=[corpus_desc, query_desc, infntd_input], outputs=[infntd_output, infntd_output_tokens, infntd_longest_suffix], api_name=False)
|
324 |
# ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message], api_name=False)
|
325 |
+
ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[ard_cnf_multi_output_message, ard_cnf_multi_output_tokens, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9], api_name=False)
|
326 |
+
doc_analysis_submit.click(process_1, inputs=[corpus_desc, query_desc, doc_analysis_input], outputs=[doc_analysis_output], api_name=False)
|
327 |
|
328 |
def update_query_desc(selection):
|
329 |
return {
|
constants.py
CHANGED
@@ -30,3 +30,5 @@ MAX_DIFF_TOKENS = int(os.environ.get('MAX_DIFF_TOKENS', 100))
|
|
30 |
MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
|
31 |
MAX_CLAUSES_IN_CNF = int(os.environ.get('MAX_CLAUSES_IN_CNF', 4))
|
32 |
MAX_TERMS_IN_DISJ_CLAUSE = int(os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4))
|
|
|
|
|
|
30 |
MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
|
31 |
MAX_CLAUSES_IN_CNF = int(os.environ.get('MAX_CLAUSES_IN_CNF', 4))
|
32 |
MAX_TERMS_IN_DISJ_CLAUSE = int(os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4))
|
33 |
+
|
34 |
+
MIN_QUERY_INTERVAL_SECONDS = int(os.environ.get('MIN_QUERY_INTERVAL_SECONDS', 5))
|