Spaces:
Running
Running
liujch1998
commited on
Commit
β’
e59eb9e
1
Parent(s):
ce324c1
Customizable consts
Browse files- app.py +1 -29
- constants.py +30 -0
app.py
CHANGED
@@ -2,35 +2,7 @@ import gradio as gr
|
|
2 |
import json
|
3 |
import os
|
4 |
import requests
|
5 |
-
|
6 |
-
CORPUS_BY_DESC = {
|
7 |
-
'RedPajama (LLaMA tokenizer)': 'rpj_v3_c4_llama2',
|
8 |
-
'Pile-val (GPT-2 tokenizer)': 'pile_v3_val',
|
9 |
-
}
|
10 |
-
CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
|
11 |
-
QUERY_TYPE_BY_DESC = {
|
12 |
-
'1. Count an n-gram': 'count',
|
13 |
-
'2. Compute the probability of the last token in an n-gram': 'compute_prob',
|
14 |
-
'3. Compute the next-token distribution of an (n-1)-gram': 'get_next_token_distribution_approx',
|
15 |
-
'4. Compute the β-gram probability of the last token': 'compute_infgram_prob',
|
16 |
-
'5. Compute the β-gram next-token distribution': 'get_infgram_next_token_distribution_approx',
|
17 |
-
'6. Searching for document containing n-gram(s)': 'get_a_random_document_from_cnf_query_fast_approx',
|
18 |
-
# '7. Analyze an (AI-generated) document using β-gram': 'analyze_document',
|
19 |
-
}
|
20 |
-
QUERY_DESC_BY_TYPE = {v: k for k, v in QUERY_TYPE_BY_DESC.items()}
|
21 |
-
QUERY_DESCS = list(QUERY_TYPE_BY_DESC.keys())
|
22 |
-
|
23 |
-
MAX_QUERY_CHARS = 1000
|
24 |
-
MAX_INPUT_DOC_TOKENS = 1000
|
25 |
-
MAX_OUTPUT_DOC_TOKENS = 5000 # must be an even number!
|
26 |
-
MAX_CNT_FOR_NTD = 1000
|
27 |
-
MAX_CLAUSE_FREQ = 10000
|
28 |
-
MAX_CLAUSE_FREQ_FAST = 1000000
|
29 |
-
MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD = 50000
|
30 |
-
MAX_DIFF_TOKENS = 100
|
31 |
-
MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
|
32 |
-
MAX_CLAUSES_IN_CNF = 4
|
33 |
-
MAX_TERMS_IN_DISJ_CLAUSE = 4
|
34 |
|
35 |
API_IPADDR = os.environ.get('API_IPADDR', None)
|
36 |
default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
|
|
|
2 |
import json
|
3 |
import os
|
4 |
import requests
|
5 |
+
from .constants import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
API_IPADDR = os.environ.get('API_IPADDR', None)
|
8 |
default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
|
constants.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
CORPUS_BY_DESC = {
|
4 |
+
'RedPajama (LLaMA tokenizer)': 'rpj_v3_c4_llama2',
|
5 |
+
'Pile-val (GPT-2 tokenizer)': 'pile_v3_val',
|
6 |
+
}
|
7 |
+
CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
|
8 |
+
QUERY_TYPE_BY_DESC = {
|
9 |
+
'1. Count an n-gram': 'count',
|
10 |
+
'2. Compute the probability of the last token in an n-gram': 'compute_prob',
|
11 |
+
'3. Compute the next-token distribution of an (n-1)-gram': 'get_next_token_distribution_approx',
|
12 |
+
'4. Compute the β-gram probability of the last token': 'compute_infgram_prob',
|
13 |
+
'5. Compute the β-gram next-token distribution': 'get_infgram_next_token_distribution_approx',
|
14 |
+
'6. Searching for document containing n-gram(s)': 'get_a_random_document_from_cnf_query_fast_approx',
|
15 |
+
# '7. Analyze an (AI-generated) document using β-gram': 'analyze_document',
|
16 |
+
}
|
17 |
+
QUERY_DESC_BY_TYPE = {v: k for k, v in QUERY_TYPE_BY_DESC.items()}
|
18 |
+
QUERY_DESCS = list(QUERY_TYPE_BY_DESC.keys())
|
19 |
+
|
20 |
+
MAX_QUERY_CHARS = os.environ.get('MAX_QUERY_CHARS', 1000)
|
21 |
+
MAX_INPUT_DOC_TOKENS = os.environ.get('MAX_INPUT_DOC_TOKENS', 1000)
|
22 |
+
MAX_OUTPUT_DOC_TOKENS = os.environ.get('MAX_OUTPUT_DOC_TOKENS', 5000)
|
23 |
+
MAX_CNT_FOR_NTD = os.environ.get('MAX_CNT_FOR_NTD', 1000)
|
24 |
+
MAX_CLAUSE_FREQ = os.environ.get('MAX_CLAUSE_FREQ', 10000)
|
25 |
+
MAX_CLAUSE_FREQ_FAST = os.environ.get('MAX_CLAUSE_FREQ_FAST', 1000000)
|
26 |
+
MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD = os.environ.get('MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD', 50000)
|
27 |
+
MAX_DIFF_TOKENS = os.environ.get('MAX_DIFF_TOKENS', 100)
|
28 |
+
MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
|
29 |
+
MAX_CLAUSES_IN_CNF = os.environ.get('MAX_CLAUSES_IN_CNF', 4)
|
30 |
+
MAX_TERMS_IN_DISJ_CLAUSE = os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4)
|