Spaces:
Running
Running
liujch1998
commited on
Commit
β’
4641d03
1
Parent(s):
40c12a7
Improve description and logging
Browse files- app.py +16 -4
- constants.py +3 -3
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import json
|
3 |
import os
|
4 |
import requests
|
@@ -8,12 +9,13 @@ API_IPADDR = os.environ.get('API_IPADDR', None)
|
|
8 |
default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
|
9 |
max_size = os.environ.get('max_size', 100)
|
10 |
max_threads = os.environ.get('max_threads', 40)
|
11 |
-
debug = os.environ.get('debug', False)
|
12 |
|
13 |
def process(corpus_desc, query_desc, query):
|
14 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
15 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
16 |
-
|
|
|
17 |
data = {
|
18 |
'corpus': corpus,
|
19 |
'query_type': query_type,
|
@@ -26,7 +28,8 @@ def process(corpus_desc, query_desc, query):
|
|
26 |
result = response.json()
|
27 |
else:
|
28 |
raise ValueError(f'Invalid response: {response.status_code}')
|
29 |
-
|
|
|
30 |
return result
|
31 |
|
32 |
with gr.Blocks() as demo:
|
@@ -35,12 +38,13 @@ with gr.Blocks() as demo:
|
|
35 |
'''<h1 text-align="center">Infini-gram: An Engine for n-gram / β-gram Language Models with Trillion-Token Corpora</h1>
|
36 |
|
37 |
<p style='font-size: 16px;'>This is an engine that processes n-gram / β-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
|
|
|
38 |
'''
|
39 |
)
|
40 |
with gr.Row():
|
41 |
with gr.Column(scale=1):
|
42 |
corpus_desc = gr.Radio(choices=CORPUS_DESCS, label='Corpus', value=CORPUS_DESCS[0])
|
43 |
-
with gr.Column(scale=
|
44 |
query_desc = gr.Radio(
|
45 |
choices=QUERY_DESCS, label='Query Type', value=QUERY_DESCS[0],
|
46 |
)
|
@@ -171,6 +175,14 @@ with gr.Blocks() as demo:
|
|
171 |
with gr.Column(scale=1):
|
172 |
doc_analysis_output = gr.HTML(value='', label='Analysis')
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
count_clear.add([count_input, count_output, count_output_tokens])
|
175 |
ngram_clear.add([ngram_input, ngram_output, ngram_output_tokens])
|
176 |
a_ntd_clear.add([a_ntd_input, a_ntd_output, a_ntd_output_tokens])
|
|
|
1 |
import gradio as gr
|
2 |
+
import datetime
|
3 |
import json
|
4 |
import os
|
5 |
import requests
|
|
|
9 |
default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
|
10 |
max_size = os.environ.get('max_size', 100)
|
11 |
max_threads = os.environ.get('max_threads', 40)
|
12 |
+
debug = (os.environ.get('debug', 'False') != 'False')
|
13 |
|
14 |
def process(corpus_desc, query_desc, query):
|
15 |
corpus = CORPUS_BY_DESC[corpus_desc]
|
16 |
query_type = QUERY_TYPE_BY_DESC[query_desc]
|
17 |
+
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
|
18 |
+
print(json.dumps({'timestamp': timestamp, 'corpus': corpus, 'query_type': query_type, 'query': query}))
|
19 |
data = {
|
20 |
'corpus': corpus,
|
21 |
'query_type': query_type,
|
|
|
28 |
result = response.json()
|
29 |
else:
|
30 |
raise ValueError(f'Invalid response: {response.status_code}')
|
31 |
+
if debug:
|
32 |
+
print(result)
|
33 |
return result
|
34 |
|
35 |
with gr.Blocks() as demo:
|
|
|
38 |
'''<h1 text-align="center">Infini-gram: An Engine for n-gram / β-gram Language Models with Trillion-Token Corpora</h1>
|
39 |
|
40 |
<p style='font-size: 16px;'>This is an engine that processes n-gram / β-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
|
41 |
+
<p style='font-size: 16px;'>The engine is documented in our paper: <a href="">Infini-gram: Scaling Unbounded n-gram Language Models to a Trillion Tokens</a></p>
|
42 |
'''
|
43 |
)
|
44 |
with gr.Row():
|
45 |
with gr.Column(scale=1):
|
46 |
corpus_desc = gr.Radio(choices=CORPUS_DESCS, label='Corpus', value=CORPUS_DESCS[0])
|
47 |
+
with gr.Column(scale=3):
|
48 |
query_desc = gr.Radio(
|
49 |
choices=QUERY_DESCS, label='Query Type', value=QUERY_DESCS[0],
|
50 |
)
|
|
|
175 |
with gr.Column(scale=1):
|
176 |
doc_analysis_output = gr.HTML(value='', label='Analysis')
|
177 |
|
178 |
+
with gr.Row():
|
179 |
+
gr.Markdown('''
|
180 |
+
If you find this tool useful, please kindly cite our paper:
|
181 |
+
```
|
182 |
+
(coming soon)
|
183 |
+
```
|
184 |
+
''')
|
185 |
+
|
186 |
count_clear.add([count_input, count_output, count_output_tokens])
|
187 |
ngram_clear.add([ngram_input, ngram_output, ngram_output_tokens])
|
188 |
a_ntd_clear.add([a_ntd_input, a_ntd_output, a_ntd_output_tokens])
|
constants.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import os
|
2 |
|
3 |
CORPUS_BY_DESC = {
|
4 |
-
'RedPajama (LLaMA tokenizer)': 'v3_rpj_llama_c4',
|
5 |
-
'Pile-val (LLaMA tokenizer)': 'v3_pileval_llama',
|
6 |
-
'Pile-val (GPT-2 tokenizer)': 'v3_pileval',
|
7 |
}
|
8 |
CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
|
9 |
QUERY_TYPE_BY_DESC = {
|
|
|
1 |
import os
|
2 |
|
3 |
CORPUS_BY_DESC = {
|
4 |
+
'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
|
5 |
+
'Pile-val (LLaMA tokenizer), 790M tokens': 'v3_pileval_llama',
|
6 |
+
'Pile-val (GPT-2 tokenizer) 770M tokens': 'v3_pileval',
|
7 |
}
|
8 |
CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
|
9 |
QUERY_TYPE_BY_DESC = {
|