diff --git a/pyserini/2cr/_base.py b/pyserini/2cr/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..8225e9c0c3d4026fc19f048b27aafae37bc4b277
--- /dev/null
+++ b/pyserini/2cr/_base.py
@@ -0,0 +1,95 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import subprocess
+
+fail_str = '\033[91m[FAIL]\033[0m'
+ok_str = '[OK]'
+okish_str = '\033[94m[OKish]\033[0m'
+
+
+def run_command(cmd):
+    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = process.communicate()
+    stdout = stdout.decode('utf-8')
+    stderr = stderr.decode('utf-8')
+
+    return stdout, stderr
+
+
+def run_eval_and_return_metric(metric, eval_key, defs, runfile):
+    eval_cmd = f'python -m pyserini.eval.trec_eval {defs} {eval_key} {runfile}'
+    eval_stdout, eval_stderr = run_command(eval_cmd)
+
+    for line in eval_stdout.split('\n'):
+        parts = line.split('\t')
+        if len(parts) == 3 and parts[1] == 'all':
+            return round(float(parts[2]), 4)
+
+    return 0.0
+
+
+def run_dpr_retrieval_eval_and_return_metric(defs, json_file):
+    """Generate dpr retrieval evaluation scores
+
+    Args:
+        defs: topk definitions (e.g., '--topk 5 20')
+        json_file: dpr retrieval json file
+
+    Returns:
+        topk: a dictionary of topk scores (e.g., {"Top5": <score>})
+    """
+    eval_cmd = f'python -m pyserini.eval.evaluate_dpr_retrieval --retrieval {json_file} {defs} '
+    eval_stdout, eval_stderr = run_command(eval_cmd)
+    topk = {}
+    for line in eval_stdout.split('\n'):
+        parts = line.split('\t')
+        if len(parts) == 2 and 'accuracy' in parts[1]:
+            topk.update({parts[0]:round(float(parts[1][10:])*100, 4)})
+    return topk
+
+
+def convert_trec_run_to_dpr_retrieval_json(topics,index,runfile,output):
+    """Convert trec runfile to dpr retrieval json file
+
+    Args:
+        topics: topics field
+        index: index field
+        runfile: input runfile
+        output: output jsonfile
+
+    Returns:
+        exit status: exit status
+    """
+    cmd = f'python -m pyserini.eval.convert_trec_run_to_dpr_retrieval_run --topics {topics} --index {index} --input {runfile} --output {output}'
+    return os.system(cmd)
+
+
+def run_fusion(run_ls, output, k):
+    """run fusion command and return status code
+
+    Args:
+        run_ls: a list of runfile paths
+        output: output path
+        k: topk value
+
+    Returns:
+        status code: status code
+    """
+    run_files = ' '.join(run_ls)
+    cmd = f'python -m pyserini.fusion --runs {run_files} --output {output} --k {k}'
+    return os.system(cmd)
diff --git a/pyserini/2cr/miracl.py b/pyserini/2cr/miracl.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3c0bb3223f62bfcc8649e553c0b11b05eaeee0b
--- /dev/null
+++ b/pyserini/2cr/miracl.py
@@ -0,0 +1,447 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import math
+import os
+import sys
+import time
+import subprocess
+import pkg_resources
+from collections import defaultdict, OrderedDict
+from string import Template
+
+import yaml
+
+from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
+
+languages = [
+    ['ar', 'arabic'],
+    ['bn', 'bengali'],
+    ['en', 'english'],
+    ['es', 'spanish'],
+    ['fa', 'persian'],
+    ['fi', 'finnish'],
+    ['fr', 'french'],
+    ['hi', 'hindi'],
+    ['id', 'indonesian'],
+    ['ja', 'japanese'],
+    ['ko', 'korean'],
+    ['ru', 'russian'],
+    ['sw', 'swahili'],
+    ['te', 'telugu'],
+    ['th', 'thai'],
+    ['zh', 'chinese'],
+    ['de', 'german'],
+    ['yo', 'yoruba']
+]
+
+html_display = OrderedDict()
+html_display['bm25'] = 'BM25'
+html_display['mdpr-tied-pft-msmarco'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO'
+html_display['mdpr-tied-pft-msmarco-ft-all'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO then FT w/ all Mr. TyDi'
+html_display['bm25-mdpr-tied-pft-msmarco-hybrid'] = 'Hybrid of `bm25` and `mdpr-tied-pft-msmarco`'
+html_display['mdpr-tied-pft-msmarco-ft-miracl'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO then in-lang FT w/ MIRACL'
+html_display['mcontriever-tied-pft-msmarco'] = 'mContriever (tied encoders), pre-FT w/ MS MARCO'
+
+models = list(html_display)
+
+trec_eval_metric_definitions = {
+    'nDCG@10': '-c -M 100 -m ndcg_cut.10',
+    'R@100': '-c -m recall.100',
+}
+
+
+def format_run_command(raw):
+    return raw.replace('--lang', '\\\n  --lang') \
+        .replace('--encoder', '\\\n  --encoder') \
+        .replace('--topics', '\\\n  --topics') \
+        .replace('--index', '\\\n  --index') \
+        .replace('--output ', '\\\n  --output ') \
+        .replace('--runs', '\\\n  --runs ') \
+        .replace('--batch ', '\\\n  --batch ') \
+        .replace('--threads 12', '--threads 12 \\\n ')
+
+
+def format_eval_command(raw):
+    return raw.replace('-c ', '\\\n  -c ') \
+        .replace(raw.split()[-1], f'\\\n  {raw.split()[-1]}')
+
+
+def read_file(f):
+    fin = open(f, 'r')
+    text = fin.read()
+    fin.close()
+
+    return text
+
+
+def list_conditions():
+    print('Conditions:\n-----------')
+    for condition, _ in html_display.items():
+        print(condition)
+    print('\nLanguages\n---------')
+    for language in languages:
+        print(language[0])
+
+
+def generate_table_rows(table, row_template, commands, eval_commands, table_id, split, metric):
+    row_cnt = 1
+    html_rows = []
+
+    for model in models:
+        s = Template(row_template)
+
+        keys = {}
+        used_langs = 0
+        for lang in languages:
+            keys[lang[0]] = f'{model}.{lang[0]}'
+            used_langs += 1 if table[keys[lang[0]]][split][metric] != 0 else 0
+
+        sum = table[keys["ar"]][split][metric] + \
+              table[keys["bn"]][split][metric] + \
+              table[keys["en"]][split][metric] + \
+              table[keys["es"]][split][metric] + \
+              table[keys["fa"]][split][metric] + \
+              table[keys["fi"]][split][metric] + \
+              table[keys["fr"]][split][metric] + \
+              table[keys["hi"]][split][metric] + \
+              table[keys["id"]][split][metric] + \
+              table[keys["ja"]][split][metric] + \
+              table[keys["ko"]][split][metric] + \
+              table[keys["ru"]][split][metric] + \
+              table[keys["sw"]][split][metric] + \
+              table[keys["te"]][split][metric] + \
+              table[keys["th"]][split][metric] + \
+              table[keys["zh"]][split][metric] + \
+              table[keys["de"]][split][metric] + \
+              table[keys["yo"]][split][metric]
+        avg = sum / used_langs
+
+        s = s.substitute(table_cnt=table_id,
+                         row_cnt=row_cnt,
+                         model=html_display[model],
+                         ar=f'{table[keys["ar"]][split][metric]:.3f}',
+                         bn=f'{table[keys["bn"]][split][metric]:.3f}',
+                         en=f'{table[keys["en"]][split][metric]:.3f}',
+                         es=f'{table[keys["es"]][split][metric]:.3f}',
+                         fa=f'{table[keys["fa"]][split][metric]:.3f}',
+                         fi=f'{table[keys["fi"]][split][metric]:.3f}',
+                         fr=f'{table[keys["fr"]][split][metric]:.3f}',
+                         hi=f'{table[keys["hi"]][split][metric]:.3f}',
+                         id=f'{table[keys["id"]][split][metric]:.3f}',
+                         ja=f'{table[keys["ja"]][split][metric]:.3f}',
+                         ko=f'{table[keys["ko"]][split][metric]:.3f}',
+                         ru=f'{table[keys["ru"]][split][metric]:.3f}',
+                         sw=f'{table[keys["sw"]][split][metric]:.3f}',
+                         te=f'{table[keys["te"]][split][metric]:.3f}',
+                         th=f'{table[keys["th"]][split][metric]:.3f}',
+                         zh=f'{table[keys["zh"]][split][metric]:.3f}',
+                         de=f'{table[keys["de"]][split][metric]:.3f}',
+                         yo=f'{table[keys["yo"]][split][metric]:.3f}',
+                         avg=f'{avg:.3f}',
+                         cmd1=f'{commands[keys["ar"]]}',
+                         cmd2=f'{commands[keys["bn"]]}',
+                         cmd3=f'{commands[keys["en"]]}',
+                         cmd4=f'{commands[keys["es"]]}',
+                         cmd5=f'{commands[keys["fa"]]}',
+                         cmd6=f'{commands[keys["fi"]]}',
+                         cmd7=f'{commands[keys["fr"]]}',
+                         cmd8=f'{commands[keys["hi"]]}',
+                         cmd9=f'{commands[keys["id"]]}',
+                         cmd10=f'{commands[keys["ja"]]}',
+                         cmd11=f'{commands[keys["ko"]]}',
+                         cmd12=f'{commands[keys["ru"]]}',
+                         cmd13=f'{commands[keys["sw"]]}',
+                         cmd14=f'{commands[keys["te"]]}',
+                         cmd15=f'{commands[keys["th"]]}',
+                         cmd16=f'{commands[keys["zh"]]}',
+                         cmd17=f'{commands[keys["de"]]}',
+                         cmd18=f'{commands[keys["yo"]]}',
+                         eval_cmd1=f'{eval_commands[keys["ar"]][metric]}',
+                         eval_cmd2=f'{eval_commands[keys["bn"]][metric]}',
+                         eval_cmd3=f'{eval_commands[keys["en"]][metric]}',
+                         eval_cmd4=f'{eval_commands[keys["es"]][metric]}',
+                         eval_cmd5=f'{eval_commands[keys["fa"]][metric]}',
+                         eval_cmd6=f'{eval_commands[keys["fi"]][metric]}',
+                         eval_cmd7=f'{eval_commands[keys["fr"]][metric]}',
+                         eval_cmd8=f'{eval_commands[keys["hi"]][metric]}',
+                         eval_cmd9=f'{eval_commands[keys["id"]][metric]}',
+                         eval_cmd10=f'{eval_commands[keys["ja"]][metric]}',
+                         eval_cmd11=f'{eval_commands[keys["ko"]][metric]}',
+                         eval_cmd12=f'{eval_commands[keys["ru"]][metric]}',
+                         eval_cmd13=f'{eval_commands[keys["sw"]][metric]}',
+                         eval_cmd14=f'{eval_commands[keys["te"]][metric]}',
+                         eval_cmd15=f'{eval_commands[keys["th"]][metric]}',
+                         eval_cmd16=f'{eval_commands[keys["zh"]][metric]}',
+                         eval_cmd17=f'{eval_commands[keys["de"]][metric]}',
+                         eval_cmd18=f'{eval_commands[keys["yo"]][metric]}'
+                         )
+
+        s = s.replace("0.000", "--")
+        html_rows.append(s)
+        row_cnt += 1
+
+    return html_rows
+
+
+def print_results(table, metric, split):
+    print(f'Metric = {metric}, Split = {split}')
+    print(' ' * 35, end='')
+    for lang in languages:
+        print(f'{lang[0]:3}    ', end='')
+    print('')
+    for model in models:
+        print(f'{model:33}', end='')
+        for lang in languages:
+            key = f'{model}.{lang[0]}'
+            print(f'{table[key][split][metric]:7.3f}', end='')
+        print('')
+    print('')
+
+
+def extract_topic_fn_from_cmd(cmd):
+    cmd = cmd.split()
+    topic_idx = cmd.index('--topics')
+    return cmd[topic_idx + 1]
+
+
+def generate_report(args):
+    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+    commands = defaultdict(lambda: '')
+    eval_commands = defaultdict(lambda: defaultdict(lambda: ''))
+
+    html_template = read_file(pkg_resources.resource_filename(__name__, 'miracl_html.template'))
+    table_template = read_file(pkg_resources.resource_filename(__name__, 'miracl_html_table.template'))
+    row_template = read_file(pkg_resources.resource_filename(__name__, 'miracl_html_table_row.template'))
+
+    with open(pkg_resources.resource_filename(__name__, 'miracl.yaml')) as f:
+        yaml_data = yaml.safe_load(f)
+        for condition in yaml_data['conditions']:
+            name = condition['name']
+            eval_key = condition['eval_key']
+            cmd_template = condition['command']
+            cmd_lst = cmd_template.split()
+            lang = name.split('.')[-1]
+            is_hybrid_run = 'hybrid' in name
+
+            for splits in condition['splits']:
+                split = splits['split']
+                if is_hybrid_run:
+                    hits = int(cmd_lst[cmd_lst.index('--k') + 1])
+                else:
+                    hits = int(cmd_lst[cmd_lst.index('--hits') + 1])
+
+                runfile = os.path.join(args.directory, f'run.miracl.{name}.{split}.txt')
+                if is_hybrid_run:
+                    bm25_output = os.path.join(args.directory,
+                                               f'run.miracl.bm25.{lang}.{split}.top{hits}.txt')
+                    mdpr_output = os.path.join(args.directory,
+                                               f'run.miracl.mdpr-tied-pft-msmarco.{lang}.{split}.top{hits}.txt')
+                    expected_args = dict(output=runfile, bm25_output=bm25_output, mdpr_output=mdpr_output)
+                else:
+                    expected_args = dict(split=split, output=runfile)
+
+                if not all([f"${k}" in cmd_template or f"${{{k}}}" in cmd_template for k in expected_args]):
+                    raise ValueError(f"Not all arguements {list(expected_args)} detected from inputs: {cmd_template}.")
+                cmd = Template(cmd_template).substitute(**expected_args)
+                commands[name] = format_run_command(cmd)
+
+                for expected in splits['scores']:
+                    for metric in expected:
+                        if str(expected[metric])[-1] == "5":
+                            # without adding espilon, there is a chance that f-string would round 0.5 to 0 rather than 1
+                            # e.g., 0.8885 -> 0.888 rather than 0.889
+                            # add a espilon to the expected score to avoid rounding error
+                            expected[metric] += 1e-5
+                        table[name][split][metric] = expected[metric]
+
+                        eval_cmd = f'python -m pyserini.eval.trec_eval ' + \
+                                   f'{trec_eval_metric_definitions[metric]} {eval_key}-{split} {runfile}'
+                        eval_commands[name][metric] = format_eval_command(eval_cmd)
+
+        tables_html = []
+
+        split = 'dev'
+
+        # Build the table for MRR@100, test queries
+        html_rows = generate_table_rows(table, row_template, commands, eval_commands, 1, split, 'nDCG@10')
+        all_rows = '\n'.join(html_rows)
+        tables_html.append(Template(table_template).substitute(desc=f'nDCG@10, {split} queries', rows=all_rows))
+
+        # Build the table for R@100, test queries
+        html_rows = generate_table_rows(table, row_template, commands, eval_commands, 2, split, 'R@100')
+        all_rows = '\n'.join(html_rows)
+        tables_html.append(Template(table_template).substitute(desc=f'Recall@100, {split} queries', rows=all_rows))
+
+    with open(args.output, 'w') as out:
+        out.write(Template(html_template).substitute(title='MIRACL', tables=' '.join(tables_html)))
+
+
+def run_conditions(args):
+    if args.condition == 'mdpr-tied-pft-msmarco-ft-miracl' and args.language in ['de', 'yo']:
+        print('MIRACL de and yo datasets do not have train splits to finetune with')
+        return
+
+    start = time.time()
+
+    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+
+    with open(pkg_resources.resource_filename(__name__, 'miracl.yaml')) as f:
+        yaml_data = yaml.safe_load(f)
+        for condition in yaml_data['conditions']:
+            name = condition['name']
+            encoder = name.split('.')[0]
+            lang = name.split('.')[-1]
+            if args.all:
+                pass
+            elif args.condition != encoder:
+                continue
+            elif args.language and args.language != lang:
+                continue
+            eval_key = condition['eval_key']
+            cmd_template = condition['command']
+            cmd_lst = cmd_template.split()
+
+            print(f'condition {name}:')
+            is_hybrid_run = 'hybrid' in name
+
+            for splits in condition['splits']:
+                split = splits['split']
+                if is_hybrid_run:
+                    hits = int(cmd_lst[cmd_lst.index('--k') + 1])
+                else:
+                    hits = int(cmd_lst[cmd_lst.index('--hits') + 1])
+
+                print(f'  - split: {split}')
+
+                runfile = os.path.join(args.directory, f'run.miracl.{name}.{split}.top{hits}.txt')
+                if is_hybrid_run:
+                    bm25_output = os.path.join(args.directory,
+                                               f'run.miracl.bm25.{lang}.{split}.top{hits}.txt')
+                    mdpr_output = os.path.join(args.directory,
+                                               f'run.miracl.mdpr-tied-pft-msmarco.{lang}.{split}.top{hits}.txt')
+                    if not os.path.exists(bm25_output):
+                        print(f'Missing BM25 file: {bm25_output}')
+                        continue
+                    if not os.path.exists(mdpr_output):
+                        print(f'Missing mDPR file: {mdpr_output}')
+                        continue
+                    cmd = Template(cmd_template).substitute(split=split, output=runfile, bm25_output=bm25_output,
+                                                            mdpr_output=mdpr_output)
+                else:
+                    cmd = Template(cmd_template).substitute(split=split, output=runfile)
+
+                # In the yaml file, the topics are written as something like '--topics miracl-v1.0-ar-${split}'
+                # This works for the dev split because the topics are directly included in Anserini/Pyserini.
+                # For this training split, we have to map the symbol into a file in tools/topics-and-qrels/
+                # Here, we assume that the developer has cloned the miracl repo and placed the topics there.
+                if split == 'train':
+                    cmd = cmd.replace(f'--topics miracl-v1.0-{lang}-{split}',
+                                      f'--topics tools/topics-and-qrels/topics.miracl-v1.0-{lang}-{split}.tsv')
+
+                if args.display_commands:
+                    print(f'\n```bash\n{format_run_command(cmd)}\n```\n')
+
+                if not os.path.exists(runfile):
+                    if not args.dry_run:
+                        rtn = subprocess.run(cmd.split(), capture_output=True)
+                        stderr = rtn.stderr.decode()
+                        if '--topics' in cmd:
+                            topic_fn = extract_topic_fn_from_cmd(cmd)
+                            if f'ValueError: Topic {topic_fn} Not Found' in stderr:
+                                print(f'Skipping {topic_fn}: file not found.')
+                                continue
+
+                for expected in splits['scores']:
+                    for metric in expected:
+                        if not args.skip_eval:
+                            # We have the translate the training qrels into a file located in tools/topics-and-qrels/
+                            # because they are not included with Anserini/Pyserini by default.
+                            # Here, we assume that the developer has cloned the miracl repo and placed the qrels there.
+                            if split == 'train':
+                                qrels = f'tools/topics-and-qrels/qrels.{eval_key}-train.tsv'
+                            else:
+                                qrels = f'{eval_key}-{split}'
+                            score = float(run_eval_and_return_metric(metric, qrels,
+                                                                     trec_eval_metric_definitions[metric], runfile))
+                            if math.isclose(score, float(expected[metric])):
+                                result_str = ok_str
+                            # Flaky tests
+                            elif (name == 'mdpr-tied-pft-msmarco.hi' and split == 'train'
+                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
+                                 (name == 'mdpr-tied-pft-msmarco-ft-all.ru'
+                                  and split == 'dev' and metric == 'nDCG@10'
+                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
+                                 (name == 'bm25-mdpr-tied-pft-msmarco-hybrid.te'
+                                  and split == 'train' and metric == 'nDCG@10'
+                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
+                                 (name == 'bm25-mdpr-tied-pft-msmarco-hybrid.zh'
+                                  and split == 'dev' and metric == 'nDCG@10'
+                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)):
+                                result_str = okish_str
+                            else:
+                                result_str = fail_str + f' expected {expected[metric]:.4f}'
+                            print(f'      {metric:7}: {score:.4f} {result_str}')
+                            table[name][split][metric] = score
+                        else:
+                            table[name][split][metric] = expected[metric]
+
+            print('')
+
+    for metric in ['nDCG@10', 'R@100']:
+        for split in ['dev', 'train']:
+            print_results(table, metric, split)
+
+    end = time.time()
+    print(f'Total elapsed time: {end - start:.0f}s')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate regression matrix for MIRACL.')
+    parser.add_argument('--condition', type=str,
+                        help='Condition to run', required=False)
+    # To list all conditions
+    parser.add_argument('--list-conditions', action='store_true', default=False, help='List available conditions.')
+    # For generating reports
+    parser.add_argument('--generate-report', action='store_true', default=False, help='Generate report.')
+    parser.add_argument('--output', type=str, help='File to store report.', required=False)
+    # For actually running the experimental conditions
+    parser.add_argument('--all', action='store_true', default=False, help='Run using all languages.')
+    parser.add_argument('--language', type=str, help='Language to run.', required=False)
+    parser.add_argument('--directory', type=str, help='Base directory.', default='', required=False)
+    parser.add_argument('--dry-run', action='store_true', default=False, help='Print out commands but do not execute.')
+    parser.add_argument('--skip-eval', action='store_true', default=False, help='Skip running trec_eval.')
+    parser.add_argument('--display-commands', action='store_true', default=False, help='Display command.')
+    args = parser.parse_args()
+
+    if args.list_conditions:
+        list_conditions()
+        sys.exit()
+
+    if args.generate_report:
+        if not args.output:
+            print(f'Must specify report filename with --output.')
+            sys.exit()
+
+        generate_report(args)
+        sys.exit()
+
+    if args.all and (args.condition or args.language):
+        print('Specifying --all will run all conditions and languages')
+        sys.exit()
+
+    run_conditions(args)
diff --git a/pyserini/2cr/miracl.yaml b/pyserini/2cr/miracl.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9659442317dada43eb6ec3a20fd2a862b840e09e
--- /dev/null
+++ b/pyserini/2cr/miracl.yaml
@@ -0,0 +1,1180 @@
+conditions:
+  # BM25
+  - name: bm25.ar
+    eval_key: miracl-v1.0-ar
+    command: python -m pyserini.search.lucene --language ar --topics miracl-v1.0-ar-${split} --index miracl-v1.0-ar --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4434
+            R@100: 0.8562
+      - split: dev
+        scores:
+          - nDCG@10: 0.4809
+            R@100: 0.8885
+  - name: bm25.bn
+    eval_key: miracl-v1.0-bn
+    command: python -m pyserini.search.lucene --language bn --topics miracl-v1.0-bn-${split} --index miracl-v1.0-bn --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5122
+            R@100: 0.8934
+      - split: dev
+        scores:
+          - nDCG@10: 0.5079
+            R@100: 0.9088
+  - name: bm25.en
+    eval_key: miracl-v1.0-en
+    command: python -m pyserini.search.lucene --language en --topics miracl-v1.0-en-${split} --index miracl-v1.0-en --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3415
+            R@100: 0.7928
+      - split: dev
+        scores:
+          - nDCG@10: 0.3506
+            R@100: 0.8190
+  - name: bm25.es
+    eval_key: miracl-v1.0-es
+    command: python -m pyserini.search.lucene --language es --topics miracl-v1.0-es-${split} --index miracl-v1.0-es --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3030
+            R@100: 0.7020
+      - split: dev
+        scores:
+          - nDCG@10: 0.3193
+            R@100: 0.7018
+  - name: bm25.fa
+    eval_key: miracl-v1.0-fa
+    command: python -m pyserini.search.lucene --language fa --topics miracl-v1.0-fa-${split} --index miracl-v1.0-fa --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3270
+            R@100: 0.7139
+      - split: dev
+        scores:
+          - nDCG@10: 0.3334
+            R@100: 0.7306
+  - name: bm25.fi
+    eval_key: miracl-v1.0-fi
+    command: python -m pyserini.search.lucene --language fi --topics miracl-v1.0-fi-${split} --index miracl-v1.0-fi --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5106
+            R@100: 0.8471
+      - split: dev
+        scores:
+          - nDCG@10: 0.5513
+            R@100: 0.8910
+  - name: bm25.fr
+    eval_key: miracl-v1.0-fr
+    command: python -m pyserini.search.lucene --language fr --topics miracl-v1.0-fr-${split} --index miracl-v1.0-fr --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.2152
+            R@100: 0.6601
+      - split: dev
+        scores:
+          - nDCG@10: 0.1832
+            R@100: 0.6528
+  - name: bm25.hi
+    eval_key: miracl-v1.0-hi
+    command: python -m pyserini.search.lucene --language hi --topics miracl-v1.0-hi-${split} --index miracl-v1.0-hi --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4745
+            R@100: 0.9016
+      - split: dev
+        scores:
+          - nDCG@10: 0.4578
+            R@100: 0.8679
+  - name: bm25.id
+    eval_key: miracl-v1.0-id
+    command: python -m pyserini.search.lucene --language id --topics miracl-v1.0-id-${split} --index miracl-v1.0-id --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4844
+            R@100: 0.9234
+      - split: dev
+        scores:
+          - nDCG@10: 0.4486
+            R@100: 0.9041
+  - name: bm25.ja
+    eval_key: miracl-v1.0-ja
+    command: python -m pyserini.search.lucene --language ja --topics miracl-v1.0-ja-${split} --index miracl-v1.0-ja --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3796
+            R@100: 0.8225
+      - split: dev
+        scores:
+          - nDCG@10: 0.3689
+            R@100: 0.8048
+  - name: bm25.ko
+    eval_key: miracl-v1.0-ko
+    command: python -m pyserini.search.lucene --language ko --topics miracl-v1.0-ko-${split} --index miracl-v1.0-ko --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4279
+            R@100: 0.7572
+      - split: dev
+        scores:
+          - nDCG@10: 0.4190
+            R@100: 0.7831
+  - name: bm25.ru
+    eval_key: miracl-v1.0-ru
+    command: python -m pyserini.search.lucene --language ru --topics miracl-v1.0-ru-${split} --index miracl-v1.0-ru --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3153
+            R@100: 0.6464
+      - split: dev
+        scores:
+          - nDCG@10: 0.3342
+            R@100: 0.6614
+  - name: bm25.sw
+    eval_key: miracl-v1.0-sw
+    command: python -m pyserini.search.lucene --language sw --topics miracl-v1.0-sw-${split} --index miracl-v1.0-sw --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3356
+            R@100: 0.6499
+      - split: dev
+        scores:
+          - nDCG@10: 0.3826
+            R@100: 0.7008
+  - name: bm25.te
+    eval_key: miracl-v1.0-te
+    command: python -m pyserini.search.lucene --language te --topics miracl-v1.0-te-${split} --index miracl-v1.0-te --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4814
+            R@100: 0.8077
+      - split: dev
+        scores:
+          - nDCG@10: 0.4942
+            R@100: 0.8307
+  - name: bm25.th
+    eval_key: miracl-v1.0-th
+    command: python -m pyserini.search.lucene --language th --topics miracl-v1.0-th-${split} --index miracl-v1.0-th --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4629
+            R@100: 0.8768
+      - split: dev
+        scores:
+          - nDCG@10: 0.4838
+            R@100: 0.8874
+  - name: bm25.zh
+    eval_key: miracl-v1.0-zh
+    command: python -m pyserini.search.lucene --language zh --topics miracl-v1.0-zh-${split} --index miracl-v1.0-zh --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.2018
+            R@100: 0.5541
+      - split: dev
+        scores:
+          - nDCG@10: 0.1801
+            R@100: 0.5599
+  - name: bm25.de
+    eval_key: miracl-v1.0-de
+    command: python -m pyserini.search.lucene --language de --topics miracl-v1.0-de-${split} --index miracl-v1.0-de --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.2262
+            R@100: 0.5724
+  - name: bm25.yo
+    eval_key: miracl-v1.0-yo
+    command: python -m pyserini.search.lucene --pretokenized --topics miracl-v1.0-yo-${split} --index miracl-v1.0-yo --output $output --batch 128 --threads 16 --bm25 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.4059
+            R@100: 0.7325
+
+  # mdpr-tied-pft-msmarco
+  - name: mdpr-tied-pft-msmarco.ar
+    eval_key: miracl-v1.0-ar
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-ar-${split} --index miracl-v1.0-ar-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.4653
+            R@100: 0.8293  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4993
+            R@100: 0.8407
+  - name: mdpr-tied-pft-msmarco.bn
+    eval_key: miracl-v1.0-bn
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-bn-${split} --index miracl-v1.0-bn-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.4362
+            R@100: 0.8045  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4427
+            R@100: 0.8193
+  - name: mdpr-tied-pft-msmarco.en
+    eval_key: miracl-v1.0-en
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-en-${split} --index miracl-v1.0-en-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.3986
+            R@100: 0.7779  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.3938
+            R@100: 0.7675
+  - name: mdpr-tied-pft-msmarco.es
+    eval_key: miracl-v1.0-es
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-es-${split} --index miracl-v1.0-es-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4637
+            R@100: 0.8654
+      - split: dev
+        scores:
+          - nDCG@10: 0.4777
+            R@100: 0.8643
+  - name: mdpr-tied-pft-msmarco.fa
+    eval_key: miracl-v1.0-fa
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-fa-${split} --index miracl-v1.0-fa-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4882
+            R@100: 0.9092
+      - split: dev
+        scores:
+          - nDCG@10: 0.4800
+            R@100: 0.8980
+  - name: mdpr-tied-pft-msmarco.fi
+    eval_key: miracl-v1.0-fi
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-fi-${split} --index miracl-v1.0-fi-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.4426
+            R@100: 0.7611  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4721
+            R@100: 0.7877
+  - name: mdpr-tied-pft-msmarco.fr
+    eval_key: miracl-v1.0-fr
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-fr-${split} --index miracl-v1.0-fr-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4372
+            R@100: 0.9268
+      - split: dev
+        scores:
+          - nDCG@10: 0.4352
+            R@100: 0.9154
+  - name: mdpr-tied-pft-msmarco.hi
+    eval_key: miracl-v1.0-hi
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-hi-${split} --index miracl-v1.0-hi-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3685
+            R@100: 0.7780
+      - split: dev
+        scores:
+          - nDCG@10: 0.3830
+            R@100: 0.7755
+  - name: mdpr-tied-pft-msmarco.id
+    eval_key: miracl-v1.0-id
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-id-${split} --index miracl-v1.0-id-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.2549
+            R@100: 0.5610  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.2719
+            R@100: 0.5734
+  - name: mdpr-tied-pft-msmarco.ja
+    eval_key: miracl-v1.0-ja
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-ja-${split} --index miracl-v1.0-ja-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.4342
+            R@100: 0.8211  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4390
+            R@100: 0.8254
+  - name: mdpr-tied-pft-msmarco.ko
+    eval_key: miracl-v1.0-ko
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-ko-${split} --index miracl-v1.0-ko-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.4147
+            R@100: 0.7699  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4189
+            R@100: 0.7369
+  - name: mdpr-tied-pft-msmarco.ru
+    eval_key: miracl-v1.0-ru
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-ru-${split} --index miracl-v1.0-ru-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.3812
+            R@100: 0.7854  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4073
+            R@100: 0.7972
+  - name: mdpr-tied-pft-msmarco.sw
+    eval_key: miracl-v1.0-sw
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-sw-${split} --index miracl-v1.0-sw-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:     
+          - nDCG@10: 0.2973
+            R@100: 0.5761  
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.2990
+            R@100: 0.6158
+  - name: mdpr-tied-pft-msmarco.te
+    eval_key: miracl-v1.0-te
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-te-${split} --index miracl-v1.0-te-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3723
+            R@100: 0.7698
+      - split: dev
+        scores:
+          - nDCG@10: 0.3557
+            R@100: 0.7619
+  - name: mdpr-tied-pft-msmarco.th
+    eval_key: miracl-v1.0-th
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-th-${split} --index miracl-v1.0-th-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3451
+            R@100: 0.6728
+      - split: dev
+        scores:
+          - nDCG@10: 0.3578
+            R@100: 0.6783
+  - name: mdpr-tied-pft-msmarco.zh
+    eval_key: miracl-v1.0-zh
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-zh-${split} --index miracl-v1.0-zh-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5040
+            R@100: 0.9355
+      - split: dev
+        scores:
+          - nDCG@10: 0.5116
+            R@100: 0.9436
+  - name: mdpr-tied-pft-msmarco.de
+    eval_key: miracl-v1.0-de
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-de-${split} --index miracl-v1.0-de-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.4895
+            R@100: 0.8983
+  - name: mdpr-tied-pft-msmarco.yo
+    eval_key: miracl-v1.0-yo
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics miracl-v1.0-yo-${split} --index miracl-v1.0-yo-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.4439
+            R@100: 0.8403
+
+  # mdpr-tied-pft-msmarco-ft-all
+  - name: mdpr-tied-pft-msmarco-ft-all.ar
+    eval_key: miracl-v1.0-ar
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-ar-${split} --index miracl-v1.0-ar-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6954
+            R@100: 0.8542
+      - split: dev
+        scores:
+          - nDCG@10: 0.5782
+            R@100: 0.7953
+  - name: mdpr-tied-pft-msmarco-ft-all.bn
+    eval_key: miracl-v1.0-bn
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-bn-${split} --index miracl-v1.0-bn-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6823
+            R@100: 0.8646
+      - split: dev
+        scores:
+          - nDCG@10: 0.5804
+            R@100: 0.8480
+  - name: mdpr-tied-pft-msmarco-ft-all.en
+    eval_key: miracl-v1.0-en
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-en-${split} --index miracl-v1.0-en-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3491
+            R@100: 0.5678
+      - split: dev
+        scores:
+          - nDCG@10: 0.2813
+            R@100: 0.5083
+  - name: mdpr-tied-pft-msmarco-ft-all.es
+    eval_key: miracl-v1.0-es
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-es-${split} --index miracl-v1.0-es-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.2488
+            R@100: 0.4799
+      - split: dev
+        scores:
+          - nDCG@10: 0.2509
+            R@100: 0.4706
+  - name: mdpr-tied-pft-msmarco-ft-all.fa
+    eval_key: miracl-v1.0-fa
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-fa-${split} --index miracl-v1.0-fa-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3809
+            R@100: 0.6899
+      - split: dev
+        scores:
+          - nDCG@10: 0.3836
+            R@100: 0.6863
+  - name: mdpr-tied-pft-msmarco-ft-all.fi
+    eval_key: miracl-v1.0-fi
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-fi-${split} --index miracl-v1.0-fi-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.7738
+            R@100: 0.9081
+      - split: dev
+        scores:
+          - nDCG@10: 0.5694
+            R@100: 0.7984
+  - name: mdpr-tied-pft-msmarco-ft-all.fr
+    eval_key: miracl-v1.0-fr
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-fr-${split} --index miracl-v1.0-fr-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.2989
+            R@100: 0.6197
+      - split: dev
+        scores:
+          - nDCG@10: 0.3010
+            R@100: 0.6005
+  - name: mdpr-tied-pft-msmarco-ft-all.hi
+    eval_key: miracl-v1.0-hi
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-hi-${split} --index miracl-v1.0-hi-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3336
+            R@100: 0.6388
+      - split: dev
+        scores:
+          - nDCG@10: 0.3286
+            R@100: 0.6371
+  - name: mdpr-tied-pft-msmarco-ft-all.id
+    eval_key: miracl-v1.0-id
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-id-${split} --index miracl-v1.0-id-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3321
+            R@100: 0.5492
+      - split: dev
+        scores:
+          - nDCG@10: 0.3462
+            R@100: 0.5841
+  - name: mdpr-tied-pft-msmarco-ft-all.ja
+    eval_key: miracl-v1.0-ja
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-ja-${split} --index miracl-v1.0-ja-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6378
+            R@100: 0.7950
+      - split: dev
+        scores:
+          - nDCG@10: 0.4999
+            R@100: 0.7451
+  - name: mdpr-tied-pft-msmarco-ft-all.ko
+    eval_key: miracl-v1.0-ko
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-ko-${split} --index miracl-v1.0-ko-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5795
+            R@100: 0.7850
+      - split: dev
+        scores:
+          - nDCG@10: 0.4864
+            R@100: 0.7183
+  - name: mdpr-tied-pft-msmarco-ft-all.ru
+    eval_key: miracl-v1.0-ru
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-ru-${split} --index miracl-v1.0-ru-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6011
+            R@100: 0.8188
+      - split: dev
+        scores:
+          - nDCG@10: 0.3933
+            R@100: 0.6707
+  - name: mdpr-tied-pft-msmarco-ft-all.sw
+    eval_key: miracl-v1.0-sw
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-sw-${split} --index miracl-v1.0-sw-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.8882
+            R@100: 0.9710
+      - split: dev
+        scores:
+          - nDCG@10: 0.6575
+            R@100: 0.8883
+  - name: mdpr-tied-pft-msmarco-ft-all.te
+    eval_key: miracl-v1.0-te
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-te-${split} --index miracl-v1.0-te-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.8757
+            R@100: 0.9725
+      - split: dev
+        scores:
+          - nDCG@10: 0.7783
+            R@100: 0.9513
+  - name: mdpr-tied-pft-msmarco-ft-all.th
+    eval_key: miracl-v1.0-th
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-th-${split} --index miracl-v1.0-th-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.7761
+            R@100: 0.9241
+      - split: dev
+        scores:
+          - nDCG@10: 0.5975
+            R@100: 0.8360
+  - name: mdpr-tied-pft-msmarco-ft-all.zh
+    eval_key: miracl-v1.0-zh
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-zh-${split} --index miracl-v1.0-zh-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3446
+            R@100: 0.6608
+      - split: dev
+        scores:
+          - nDCG@10: 0.3575
+            R@100: 0.6725
+  - name: mdpr-tied-pft-msmarco-ft-all.de
+    eval_key: miracl-v1.0-de
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-de-${split} --index miracl-v1.0-de-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.3219
+            R@100: 0.5990
+  - name: mdpr-tied-pft-msmarco-ft-all.yo
+    eval_key: miracl-v1.0-yo
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics miracl-v1.0-yo-${split} --index miracl-v1.0-yo-mdpr-tied-pft-msmarco-ft-all --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.5983
+            R@100: 0.8908
+
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.ar
+    eval_key: miracl-v1.0-ar
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits: 
+      - split: train
+        scores:
+          - nDCG@10: 0.6259
+            R@100: 0.9173
+      - split: dev
+        scores:
+          - nDCG@10: 0.6729
+            R@100: 0.9405
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.bn
+    eval_key: miracl-v1.0-bn
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6587
+            R@100: 0.9297
+      - split: dev
+        scores:
+          - nDCG@10: 0.6540
+            R@100: 0.9321
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.en
+    eval_key: miracl-v1.0-en
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5347
+            R@100: 0.8772
+      - split: dev
+        scores:
+          - nDCG@10: 0.5488
+            R@100: 0.8815
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.es
+    eval_key: miracl-v1.0-es
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6234
+            R@100: 0.9425
+      - split: dev
+        scores:
+          - nDCG@10: 0.6413
+            R@100: 0.9479
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.fa
+    eval_key: miracl-v1.0-fa
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5890
+            R@100: 0.9433
+      - split: dev
+        scores:
+          - nDCG@10: 0.5935
+            R@100: 0.9374
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.fi
+    eval_key: miracl-v1.0-fi
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+        scores:
+          - nDCG@10: 0.6164
+            R@100: 0.8506
+      - split: dev
+        scores:
+          - nDCG@10: 0.6716
+            R@100: 0.8949
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.fr
+    eval_key: miracl-v1.0-fr
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5299
+            R@100: 0.9709
+      - split: dev
+        scores:
+          - nDCG@10: 0.5233
+            R@100: 0.9647
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.hi
+    eval_key: miracl-v1.0-hi
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6217
+            R@100: 0.9059
+      - split: dev
+        scores:
+          - nDCG@10: 0.6157
+            R@100: 0.9115
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.id
+    eval_key: miracl-v1.0-id
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4442
+            R@100: 0.7595
+      - split: dev
+        scores:
+          - nDCG@10: 0.4433
+            R@100: 0.7683
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.ja
+    eval_key: miracl-v1.0-ja
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5795
+            R@100: 0.9082
+      - split: dev
+        scores:
+          - nDCG@10: 0.5757
+            R@100: 0.9036
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.ko
+    eval_key: miracl-v1.0-ko
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5758
+            R@100: 0.8744
+      - split: dev
+        scores:
+          - nDCG@10: 0.6086
+            R@100: 0.8997
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.ru
+    eval_key: miracl-v1.0-ru
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4921
+            R@100: 0.8494
+      - split: dev
+        scores:
+          - nDCG@10: 0.5323
+            R@100: 0.8738
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.sw
+    eval_key: miracl-v1.0-sw
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4100
+            R@100: 0.6987
+      - split: dev
+        scores:
+          - nDCG@10: 0.4457
+            R@100: 0.7254
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.te
+    eval_key: miracl-v1.0-te
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.6000
+            R@100: 0.8717
+      - split: dev
+        scores:
+          - nDCG@10: 0.6021
+            R@100: 0.8569
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.th
+    eval_key: miracl-v1.0-th
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5669
+            R@100: 0.8195
+      - split: dev
+        scores:
+          - nDCG@10: 0.5990
+            R@100: 0.8228
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.zh
+    eval_key: miracl-v1.0-zh
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5209
+            R@100: 0.9576
+      - split: dev
+        scores:
+          - nDCG@10: 0.5254
+            R@100: 0.9587
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.de
+    eval_key: miracl-v1.0-de
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.5643
+            R@100: 0.9482
+  - name: bm25-mdpr-tied-pft-msmarco-hybrid.yo
+    eval_key: miracl-v1.0-yo
+    command: python -m pyserini.fusion --runs ${bm25_output} ${mdpr_output} --output ${output} --method interpolation --alpha 0.5 --depth 1000 --k 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.6114
+            R@100: 0.9496
+
+ # mdpr-tied-pft-msmarco-ft-miracl-ft-miracl 
+  - name: mdpr-tied-pft-msmarco-ft-miracl.ar
+    eval_key: miracl-v1.0-ar
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-ar --topics miracl-v1.0-ar-${split} --index miracl-v1.0-ar-mdpr-tied-pft-msmarco-ft-miracl-ar --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.7252
+            R@100: 0.9489
+  - name: mdpr-tied-pft-msmarco-ft-miracl.bn
+    eval_key: miracl-v1.0-bn
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-bn --topics miracl-v1.0-bn-${split} --index miracl-v1.0-bn-mdpr-tied-pft-msmarco-ft-miracl-bn --output $output --batch 128 --threads 16 --hits 1000
+    splits: 
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.6842
+            R@100: 0.9547
+  - name: mdpr-tied-pft-msmarco-ft-miracl.en
+    eval_key: miracl-v1.0-en
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-en --topics miracl-v1.0-en-${split} --index miracl-v1.0-en-mdpr-tied-pft-msmarco-ft-miracl-en --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4878
+            R@100: 0.8341
+  - name: mdpr-tied-pft-msmarco-ft-miracl.es
+    eval_key: miracl-v1.0-es
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-es --topics miracl-v1.0-es-${split} --index miracl-v1.0-es-mdpr-tied-pft-msmarco-ft-miracl-es --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.5648
+            R@100: 0.9109
+  - name: mdpr-tied-pft-msmarco-ft-miracl.fa
+    eval_key: miracl-v1.0-fa
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-fa --topics miracl-v1.0-fa-${split} --index miracl-v1.0-fa-mdpr-tied-pft-msmarco-ft-miracl-fa --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.5934
+            R@100: 0.9133
+  - name: mdpr-tied-pft-msmarco-ft-miracl.fi
+    eval_key: miracl-v1.0-fi
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-fi --topics miracl-v1.0-fi-${split} --index miracl-v1.0-fi-mdpr-tied-pft-msmarco-ft-miracl-fi --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.7139
+            R@100: 0.9479
+  - name: mdpr-tied-pft-msmarco-ft-miracl.fr
+    eval_key: miracl-v1.0-fr
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-fr --topics miracl-v1.0-fr-${split} --index miracl-v1.0-fr-mdpr-tied-pft-msmarco-ft-miracl-fr --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.5893
+            R@100: 0.9537
+  - name: mdpr-tied-pft-msmarco-ft-miracl.hi
+    eval_key: miracl-v1.0-hi
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-hi --topics miracl-v1.0-hi-${split} --index miracl-v1.0-hi-mdpr-tied-pft-msmarco-ft-miracl-hi --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.5164
+            R@100: 0.8862
+  - name: mdpr-tied-pft-msmarco-ft-miracl.id
+    eval_key: miracl-v1.0-id
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-id --topics miracl-v1.0-id-${split} --index miracl-v1.0-id-mdpr-tied-pft-msmarco-ft-miracl-id --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.4959
+            R@100: 0.8642
+  - name: mdpr-tied-pft-msmarco-ft-miracl.ja
+    eval_key: miracl-v1.0-ja
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-ja --topics miracl-v1.0-ja-${split} --index miracl-v1.0-ja-mdpr-tied-pft-msmarco-ft-miracl-ja --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.6416
+            R@100: 0.9225
+  - name: mdpr-tied-pft-msmarco-ft-miracl.ko
+    eval_key: miracl-v1.0-ko
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-ko --topics miracl-v1.0-ko-${split} --index miracl-v1.0-ko-mdpr-tied-pft-msmarco-ft-miracl-ko --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.5901
+            R@100: 0.8857
+  - name: mdpr-tied-pft-msmarco-ft-miracl.ru
+    eval_key: miracl-v1.0-ru
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-ru --topics miracl-v1.0-ru-${split} --index miracl-v1.0-ru-mdpr-tied-pft-msmarco-ft-miracl-ru --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.5974
+            R@100: 0.9099
+  - name: mdpr-tied-pft-msmarco-ft-miracl.sw
+    eval_key: miracl-v1.0-sw
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-sw --topics miracl-v1.0-sw-${split} --index miracl-v1.0-sw-mdpr-tied-pft-msmarco-ft-miracl-sw --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev       
+        scores:   
+          - nDCG@10: 0.6853
+            R@100: 0.9367
+  - name: mdpr-tied-pft-msmarco-ft-miracl.te
+    eval_key: miracl-v1.0-te
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-te --topics miracl-v1.0-te-${split} --index miracl-v1.0-te-mdpr-tied-pft-msmarco-ft-miracl-te --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.8037
+            R@100: 0.9616
+  - name: mdpr-tied-pft-msmarco-ft-miracl.th
+    eval_key: miracl-v1.0-th
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-th --topics miracl-v1.0-th-${split} --index miracl-v1.0-th-mdpr-tied-pft-msmarco-ft-miracl-th --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.6951
+            R@100: 0.9311
+  - name: mdpr-tied-pft-msmarco-ft-miracl.zh
+    eval_key: miracl-v1.0-zh
+    command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-miracl-zh --topics miracl-v1.0-zh-${split} --index miracl-v1.0-zh-mdpr-tied-pft-msmarco-ft-miracl-zh --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.6500
+            R@100: 0.9631
+
+  # mcontriever
+  - name: mcontriever-tied-pft-msmarco.ar
+    eval_key: miracl-v1.0-ar
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-ar-${split} --index miracl-v1.0-ar-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5027
+            R@100: 0.9166
+      - split: dev
+        scores:
+          - nDCG@10: 0.5248
+            R@100: 0.9253
+  - name: mcontriever-tied-pft-msmarco.bn
+    eval_key: miracl-v1.0-bn
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-bn-${split} --index miracl-v1.0-bn-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5138
+            R@100: 0.9313
+      - split: dev
+        scores:
+          - nDCG@10: 0.5011
+            R@100: 0.9205
+  - name: mcontriever-tied-pft-msmarco.en
+    eval_key: miracl-v1.0-en
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-en-${split} --index miracl-v1.0-en-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3579
+            R@100: 0.7990
+      - split: dev
+        scores:
+          - nDCG@10: 0.3637
+            R@100: 0.7967
+  - name: mcontriever-tied-pft-msmarco.es
+    eval_key: miracl-v1.0-es
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-es-${split} --index miracl-v1.0-es-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4081
+            R@100: 0.8339
+      - split: dev
+        scores:
+          - nDCG@10: 0.4184
+            R@100: 0.8411
+  - name: mcontriever-tied-pft-msmarco.fa
+    eval_key: miracl-v1.0-fa
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-fa-${split} --index miracl-v1.0-fa-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.2263
+            R@100: 0.6374
+      - split: dev
+        scores:
+          - nDCG@10: 0.2152
+            R@100: 0.6540
+  - name: mcontriever-tied-pft-msmarco.fi
+    eval_key: miracl-v1.0-fi
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-fi-${split} --index miracl-v1.0-fi-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5680
+            R@100: 0.9369
+      - split: dev
+        scores:
+          - nDCG@10: 0.6019
+            R@100: 0.9527
+  - name: mcontriever-tied-pft-msmarco.fr
+    eval_key: miracl-v1.0-fr
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-fr-${split} --index miracl-v1.0-fr-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3332
+            R@100: 0.8341
+      - split: dev
+        scores:
+          - nDCG@10: 0.3140
+            R@100: 0.8243
+  - name: mcontriever-tied-pft-msmarco.hi
+    eval_key: miracl-v1.0-hi
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-hi-${split} --index miracl-v1.0-hi-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.2886
+            R@100: 0.6664
+      - split: dev
+        scores:
+          - nDCG@10: 0.2864
+            R@100: 0.6461
+  - name: mcontriever-tied-pft-msmarco.id
+    eval_key: miracl-v1.0-id
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-id-${split} --index miracl-v1.0-id-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3748
+            R@100: 0.7955
+      - split: dev
+        scores:
+          - nDCG@10: 0.3915
+            R@100: 0.8015
+  - name: mcontriever-tied-pft-msmarco.ja
+    eval_key: miracl-v1.0-ja
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-ja-${split} --index miracl-v1.0-ja-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4402
+            R@100: 0.8813
+      - split: dev
+        scores:
+          - nDCG@10: 0.4240
+            R@100: 0.8783
+  - name: mcontriever-tied-pft-msmarco.ko
+    eval_key: miracl-v1.0-ko
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-ko-${split} --index miracl-v1.0-ko-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4799
+            R@100: 0.8672
+      - split: dev
+        scores:
+          - nDCG@10: 0.4829
+            R@100: 0.8753
+  - name: mcontriever-tied-pft-msmarco.ru
+    eval_key: miracl-v1.0-ru
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-ru-${split} --index miracl-v1.0-ru-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.3811
+            R@100: 0.8369
+      - split: dev
+        scores:
+          - nDCG@10: 0.3913
+            R@100: 0.8500
+  - name: mcontriever-tied-pft-msmarco.sw
+    eval_key: miracl-v1.0-sw
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-sw-${split} --index miracl-v1.0-sw-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5568
+            R@100: 0.9130
+      - split: dev
+        scores:
+          - nDCG@10: 0.5600
+            R@100: 0.9108
+  - name: mcontriever-tied-pft-msmarco.te
+    eval_key: miracl-v1.0-te
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-te-${split} --index miracl-v1.0-te-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5260
+            R@100: 0.9457
+      - split: dev
+        scores:
+          - nDCG@10: 0.5283
+            R@100: 0.9612
+  - name: mcontriever-tied-pft-msmarco.th
+    eval_key: miracl-v1.0-th
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-th-${split} --index miracl-v1.0-th-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.5299
+            R@100: 0.9361
+      - split: dev
+        scores:
+          - nDCG@10: 0.5173
+            R@100: 0.9361
+  - name: mcontriever-tied-pft-msmarco.zh
+    eval_key: miracl-v1.0-zh
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-zh-${split} --index miracl-v1.0-zh-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: train
+        scores:
+          - nDCG@10: 0.4283
+            R@100: 0.8745
+      - split: dev
+        scores:
+          - nDCG@10: 0.4097
+            R@100: 0.9026
+  - name: mcontriever-tied-pft-msmarco.de
+    eval_key: miracl-v1.0-de
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-de-${split} --index miracl-v1.0-de-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.4079
+            R@100: 0.8407
+  - name: mcontriever-tied-pft-msmarco.yo
+    eval_key: miracl-v1.0-yo
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-yo-${split} --index miracl-v1.0-yo-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000
+    splits:
+      - split: dev
+        scores:
+          - nDCG@10: 0.4150
+            R@100: 0.7703
diff --git a/pyserini/2cr/miracl_html.template b/pyserini/2cr/miracl_html.template
new file mode 100644
index 0000000000000000000000000000000000000000..2c16881851e9c6e87d475acac1309a227f8c139d
--- /dev/null
+++ b/pyserini/2cr/miracl_html.template
@@ -0,0 +1,256 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>Pyserini Reproductions</title>
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" />
+    <!-- Google Fonts Roboto -->
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" />
+    <!-- MDB -->
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" />
+
+    <style>
+tr.hide-table-padding td {
+  padding: 0;
+}
+
+.expand-button {
+  position: relative;
+}
+
+.accordion-toggle .expand-button:after {
+  position: absolute;
+  left:.75rem;
+  top: 50%;
+  transform: translate(0, -50%);
+  content: '-';
+}
+
+.accordion-toggle.collapsed .expand-button:after {
+  content: '+';
+}
+
+blockquote.mycode {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  margin-top: 15px;
+  padding-left: 15px;
+}
+
+blockquote.mycode2 {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  padding-top: 10px;
+  padding-bottom: 10px;
+  padding-left: 15px;
+}
+
+tr th.headertop {
+  border-bottom: none;
+  padding-bottom: 0rem
+}
+
+tr th.headerbottom {
+  padding-top: 0rem
+}
+
+.table>:not(caption)>*>*{padding:0.75rem 0.75rem}
+
+.copy-code-button {
+	border-radius: 0;
+	min-width: 55px;
+	background: none repeat scroll 0 0 transparent;
+	background-color: grey;
+	color: #F1F2F3 !important;
+	cursor: pointer;
+	border-style: none;
+	font-family: 'HELVETICA',sans-serif;
+	font-size: 0.8em;
+	font-weight: normal;
+	text-align: center;
+	text-decoration: none;
+	text-indent: 0;
+	text-transform: uppercase;
+	font-weight: 500;
+	line-height: 1.42rem;
+	margin: 0;
+	padding: 3px 8px;
+	position: absolute !important;
+	top: 0 !important;
+	right: 0 !important;
+}
+
+.copy-code-button > span {
+	color: #F1F2F3 !important;
+}
+
+.copy-code-button, ::before, ::after {
+	box-sizing: inherit;
+}
+
+.copy-code-button::before {
+	content: '';
+	display: inline-block;
+	width: 16px;
+	height: 16px;
+	margin-right: 3px;
+	background-size: contain;
+	background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg==");
+	background-repeat: no-repeat;
+	position: relative;
+	top: 3px;
+}
+
+.copy-code-button:focus {
+    /* Avoid an ugly focus outline on click in Chrome,
+       but darken the button for accessibility.
+       See https://stackoverflow.com/a/25298082/1481479 */
+    /* background-color: #E6E6E6; */
+	outline: 0;
+}
+
+pre[class*="prettyprint"] {
+	position: relative;
+	overflow: hidden;
+}
+    </style>
+</head>
+<body>
+
+    <!-- Background image -->
+    <div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px">
+      <div class="mask" style="
+            background: linear-gradient(
+              45deg,
+              rgba(29, 236, 197, 0.7),
+              rgba(91, 14, 214, 0.7) 100%
+            );
+          ">
+        <div class="container d-flex align-items-center justify-content-center text-center h-100"  style="max-height: 150px">
+          <div class="text-white">
+            <h1 class="mb-3">$title</h1>
+          </div>
+        </div>
+      </div>
+    </div>
+    <!-- Background image -->
+
+    <div class="container my-4">
+
+    $tables
+
+    </ul>
+
+<div style="padding-top: 20px"/>
+
+<h4>Programmatic Execution</h4>
+
+<p>All experimental runs shown in the above table can be programmatically executed based on the instructions below.
+To list all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --list-conditions
+</tt></blockquote>
+
+<p>Run all languages for a specific condition and show commands:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --condition bm25 --display-commands
+</tt></blockquote>
+
+<p>Run a particular language for a specific condition and show commands:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --condition bm25 --language ko --display-commands
+</tt></blockquote>
+
+<p>Run all languages for all conditions and show commands:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --all --display-commands
+</tt></blockquote>
+
+<p>With the above commands, run files will be placed in the current directory. Use the option <tt>--directory runs</tt> to place the runs in a sub-directory.</p>
+
+<p>For a specific condition, just show the commands and do not run:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --condition bm25 --display-commands --dry-run
+</tt></blockquote>
+
+<p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p>
+
+<p>For a specific condition and language, just show the commands and do not run:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --condition bm25 --language ko --display-commands --dry-run
+</tt></blockquote>
+
+<p>For all conditions, just show the commands and do not run and skip evaluation:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --all --display-commands --dry-run --skip-eval
+</tt></blockquote>
+
+<p>Finally, to generate this page:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.miracl --generate-report --output docs/2cr/miracl.html
+</tt></blockquote>
+
+<p>The output file <tt>miracl.html</tt> should be identical to this page.</p>
+
+<div style="padding-top: 50px"/>
+
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script>
+    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script>
+
+<script>
+document.querySelectorAll('pre').forEach(function (codeBlock) {
+    var button = document.createElement('button');
+    button.className = 'copy-code-button';
+    button.type = 'button';
+    var s = codeBlock.innerText;
+    button.setAttribute('data-clipboard-text',s);
+    button.innerText = 'Copy';
+
+    // var pre = codeBlock.parentNode;
+    codeBlock.classList.add('prettyprint');
+    // pre.parentNode.insertBefore(button, pre);
+    codeBlock.appendChild(button);
+});
+
+var clipboard = new ClipboardJS('.copy-code-button');
+
+clipboard.on('success', function(e) {
+  console.info('Action:', e.action);
+  console.info('Text:', e.text);
+  console.info('Trigger:', e.trigger);
+  e.trigger.textContent = 'Copied';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+clipboard.on('error', function(e) {
+  console.error('Action:', e.action);
+  console.error('Trigger:', e.trigger);
+  e.trigger.textContent = 'Error Copying';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+</script>
+
+</body>
+</html>
diff --git a/pyserini/2cr/miracl_html_table.template b/pyserini/2cr/miracl_html_table.template
new file mode 100644
index 0000000000000000000000000000000000000000..c008b40149c0fe1fd71d0d20e941374917824a64
--- /dev/null
+++ b/pyserini/2cr/miracl_html_table.template
@@ -0,0 +1,35 @@
+<div class="table-responsive">
+  <table class="table">
+    <thead>
+      <tr>
+        <th scope="col"></th>
+        <th scope="col">$desc</th>
+        <th scope="col">ar</th>
+        <th scope="col">bn</th>
+        <th scope="col">en</th>
+        <th scope="col">es</th>
+        <th scope="col">fa</th>
+        <th scope="col">fi</th>
+        <th scope="col">fr</th>
+        <th scope="col">hi</th>
+        <th scope="col">id</th>
+        <th scope="col">ja</th>
+        <th scope="col">ko</th>
+        <th scope="col">ru</th>
+        <th scope="col">sw</th>
+        <th scope="col">te</th>
+        <th scope="col">th</th>
+        <th scope="col">zh</th>
+        <th scope="col">de</th>
+        <th scope="col">yo</th>
+        <th scope="col"></th>
+        <th scope="col">avg</th>
+      </tr>
+    </thead>
+    <tbody>
+
+$rows
+
+    </tbody>
+  </table>
+</div>
diff --git a/pyserini/2cr/miracl_html_table_row.template b/pyserini/2cr/miracl_html_table_row.template
new file mode 100644
index 0000000000000000000000000000000000000000..24a78c846692b22c275bbfd9f5124dd7b2ae4c56
--- /dev/null
+++ b/pyserini/2cr/miracl_html_table_row.template
@@ -0,0 +1,336 @@
+<!-- Condition: $model -->
+<tr class="accordion-toggle collapsed" id="table${table_cnt}-row${row_cnt}" data-toggle="collapse" data-parent="#table${table_cnt}-row${row_cnt}" href="#table${table_cnt}-collapse${row_cnt}">
+<td class="expand-button"></td>
+<td>$model</td>
+<td>$ar</td>
+<td>$bn</td>
+<td>$en</td>
+<td>$es</td>
+<td>$fa</td>
+<td>$fi</td>
+<td>$fr</td>
+<td>$hi</td>
+<td>$id</td>
+<td>$ja</td>
+<td>$ko</td>
+<td>$ru</td>
+<td>$sw</td>
+<td>$te</td>
+<td>$th</td>
+<td>$zh</td>
+<td>$de</td>
+<td>$yo</td>
+<td></td>
+<td>$avg</td>
+</tr>
+<tr class="hide-table-padding">
+<td colspan="22">
+<div id="table${table_cnt}-collapse${row_cnt}" class="collapse in p-3">
+
+<!-- Tabs navs -->
+<ul class="nav nav-tabs mb-3" id="table${table_cnt}-row${row_cnt}-tabs" role="tablist">
+  <li class="nav-item" role="presentation">
+    <a class="nav-link active" id="table${table_cnt}-row${row_cnt}-tab1-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab1" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab1" aria-selected="true" style="text-transform:none">ar</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab2-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab2" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab2" aria-selected="false" style="text-transform:none">bn</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab3-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab3" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">en</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab4-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab4" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab4" aria-selected="false" style="text-transform:none">es</a>
+  </li> 
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab5-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab5" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab5" aria-selected="false" style="text-transform:none">fa</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab6-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab6" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab6" aria-selected="false" style="text-transform:none">fi</a>
+  </li> 
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab7-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab7" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab7" aria-selected="false" style="text-transform:none">fr</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab8-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab8" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab8" aria-selected="false" style="text-transform:none">hi</a>
+  </li>  
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab9-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab9" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab9" aria-selected="false" style="text-transform:none">id</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab10-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab10" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab10" aria-selected="false" style="text-transform:none">ja</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab11-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab11" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab11" aria-selected="false" style="text-transform:none">ko</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab12-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab12" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab12" aria-selected="false" style="text-transform:none">ru</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab13-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab13" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab13" aria-selected="false" style="text-transform:none">sw</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab14-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab14" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab14" aria-selected="false" style="text-transform:none">te</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab15-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab15" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab15" aria-selected="false" style="text-transform:none">th</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab16-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab16" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab16" aria-selected="false" style="text-transform:none">zh</a>
+  </li> 
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab17-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab17" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab17" aria-selected="false" style="text-transform:none">de</a>
+  </li> 
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab18-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab18" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab18" aria-selected="false" style="text-transform:none">yo</a>
+  </li> 
+    
+</ul>
+<!-- Tabs navs -->
+
+<!-- Tabs content -->
+<div class="tab-content" id="table${table_cnt}-row${row_cnt}-content">
+  <div class="tab-pane fade show active" id="table${table_cnt}-row${row_cnt}-tab1" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab1">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd1}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab2" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab2">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd2
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd2}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab3" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab3">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd3
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd3}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab4" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab4">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd4
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd4}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab5" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab5">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd5
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd5}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab6" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab6">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd6
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd6}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab7" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab7">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd7
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd7}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab8" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab8">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd8
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd8}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab9" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab9">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd9
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd9}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab10" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab10">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd10
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd10}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab11" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab11">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd11
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd11}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab12" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab12">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd12
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd12}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab13" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab13">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd13
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd13}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab14" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab14">
+
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd14
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd14}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab15" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab15">
+
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd15
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd15}</code></pre>
+  </blockquote>
+
+  </div>
+
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab16" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab16">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd16
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd16}</code></pre>
+  </blockquote>
+
+  </div>
+
+   <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab17" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab17">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd17
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd17}</code></pre>
+  </blockquote>
+
+  </div>
+
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab18" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab18">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd18
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd18}</code></pre>
+  </blockquote>
+
+  </div>
+   
+</div>
+<!-- Tabs content -->
+
+</div></td>
+</tr>
diff --git a/pyserini/2cr/mrtydi.py b/pyserini/2cr/mrtydi.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ef2c78ef589b394b4918a3401582ea7e864ee01
--- /dev/null
+++ b/pyserini/2cr/mrtydi.py
@@ -0,0 +1,330 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import defaultdict
+from string import Template
+
+import argparse
+import math
+import os
+import pkg_resources
+import sys
+import time
+import yaml
+
+from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
+
+languages = [
+    ['ar', 'arabic'],
+    ['bn', 'bengali'],
+    ['en', 'english'],
+    ['fi', 'finnish'],
+    ['id', 'indonesian'],
+    ['ja', 'japanese'],
+    ['ko', 'korean'],
+    ['ru', 'russian'],
+    ['sw', 'swahili'],
+    ['te', 'telugu'],
+    ['th', 'thai']
+]
+
+models = ['bm25', 'mdpr-split-pft-nq', 'mdpr-tied-pft-nq', 'mdpr-tied-pft-msmarco', 'mdpr-tied-pft-msmarco-ft-all']
+
+html_display = {
+    'bm25': 'BM25',
+    'mdpr-split-pft-nq': 'mDPR (split encoders), pre-FT w/ NQ',
+    'mdpr-tied-pft-nq': 'mDPR (tied encoders), pre-FT w/ NQ',
+    'mdpr-tied-pft-msmarco': 'mDPR (tied encoders), pre-FT w/ MS MARCO',
+    'mdpr-tied-pft-msmarco-ft-all': 'mDPR (tied encoders), pre-FT w/ MS MARCO, FT w/ all'
+}
+
+trec_eval_metric_definitions = {
+    'MRR@100': '-c -M 100 -m recip_rank',
+    'R@100': '-c -m recall.100',
+}
+
+
+def format_run_command(raw):
+    return raw.replace('--lang', '\\\n  --lang')\
+        .replace('--encoder', '\\\n  --encoder')\
+        .replace('--topics', '\\\n  --topics')\
+        .replace('--index', '\\\n  --index')\
+        .replace('--output ', '\\\n  --output ')\
+        .replace('--batch ', '\\\n  --batch ') \
+        .replace('--threads 12', '--threads 12 \\\n ')
+
+
+def format_eval_command(raw):
+    return raw.replace('-c ', '\\\n  -c ')\
+        .replace(raw.split()[-1], f'\\\n  {raw.split()[-1]}')
+
+
+def read_file(f):
+    fin = open(f, 'r')
+    text = fin.read()
+    fin.close()
+
+    return text
+
+
+def list_conditions():
+    print('Conditions:\n-----------')
+    for condition in models:
+        print(condition)
+    print('\nLanguages\n---------')
+    for language in languages:
+        print(language[0])
+
+
+def print_results(table, metric, split):
+    print(f'Metric = {metric}, Split = {split}')
+    print(' ' * 32, end='')
+    for lang in languages:
+        print(f'{lang[0]:3}    ', end='')
+    print('')
+    for model in models:
+        print(f'{model:30}', end='')
+        for lang in languages:
+            key = f'{model}.{lang[0]}'
+            print(f'{table[key][split][metric]:7.3f}', end='')
+        print('')
+    print('')
+
+
+def generate_table_rows(table, row_template, commands, eval_commands, table_id, split, metric):
+    row_cnt = 1
+    html_rows = []
+
+    for model in models:
+        s = Template(row_template)
+
+        keys = {}
+        for lang in languages:
+            keys[lang[0]] = f'{model}.{lang[0]}'
+
+        sum = table[keys["ar"]][split][metric] + \
+              table[keys["bn"]][split][metric] + \
+              table[keys["en"]][split][metric] + \
+              table[keys["fi"]][split][metric] + \
+              table[keys["id"]][split][metric] + \
+              table[keys["ja"]][split][metric] + \
+              table[keys["ko"]][split][metric] + \
+              table[keys["ru"]][split][metric] + \
+              table[keys["sw"]][split][metric] + \
+              table[keys["te"]][split][metric] + \
+              table[keys["th"]][split][metric]
+        avg = sum / 11
+
+        s = s.substitute(table_cnt=table_id,
+                         row_cnt=row_cnt,
+                         model=html_display[model],
+                         ar=f'{table[keys["ar"]][split][metric]:.3f}',
+                         bn=f'{table[keys["bn"]][split][metric]:.3f}',
+                         en=f'{table[keys["en"]][split][metric]:.3f}',
+                         fi=f'{table[keys["fi"]][split][metric]:.3f}',
+                         id=f'{table[keys["id"]][split][metric]:.3f}',
+                         ja=f'{table[keys["ja"]][split][metric]:.3f}',
+                         ko=f'{table[keys["ko"]][split][metric]:.3f}',
+                         ru=f'{table[keys["ru"]][split][metric]:.3f}',
+                         sw=f'{table[keys["sw"]][split][metric]:.3f}',
+                         te=f'{table[keys["te"]][split][metric]:.3f}',
+                         th=f'{table[keys["th"]][split][metric]:.3f}',
+                         avg=f'{avg:.3f}',
+                         cmd1=f'{commands[keys["ar"]]}',
+                         cmd2=f'{commands[keys["bn"]]}',
+                         cmd3=f'{commands[keys["en"]]}',
+                         cmd4=f'{commands[keys["fi"]]}',
+                         cmd5=f'{commands[keys["id"]]}',
+                         cmd6=f'{commands[keys["ja"]]}',
+                         cmd7=f'{commands[keys["ko"]]}',
+                         cmd8=f'{commands[keys["ru"]]}',
+                         cmd9=f'{commands[keys["sw"]]}',
+                         cmd10=f'{commands[keys["te"]]}',
+                         cmd11=f'{commands[keys["th"]]}',
+                         eval_cmd1=f'{eval_commands[keys["ar"]][metric]}',
+                         eval_cmd2=f'{eval_commands[keys["bn"]][metric]}',
+                         eval_cmd3=f'{eval_commands[keys["en"]][metric]}',
+                         eval_cmd4=f'{eval_commands[keys["fi"]][metric]}',
+                         eval_cmd5=f'{eval_commands[keys["id"]][metric]}',
+                         eval_cmd6=f'{eval_commands[keys["ja"]][metric]}',
+                         eval_cmd7=f'{eval_commands[keys["ko"]][metric]}',
+                         eval_cmd8=f'{eval_commands[keys["ru"]][metric]}',
+                         eval_cmd9=f'{eval_commands[keys["sw"]][metric]}',
+                         eval_cmd10=f'{eval_commands[keys["te"]][metric]}',
+                         eval_cmd11=f'{eval_commands[keys["th"]][metric]}'
+                         )
+
+        html_rows.append(s)
+        row_cnt += 1
+
+    return html_rows
+
+
+def generate_report(args):
+    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+    commands = defaultdict(lambda: '')
+    eval_commands = defaultdict(lambda: defaultdict(lambda: ''))
+
+    html_template = read_file(pkg_resources.resource_filename(__name__, 'mrtydi_html.template'))
+    table_template = read_file(pkg_resources.resource_filename(__name__, 'mrtydi_html_table.template'))
+    row_template = read_file(pkg_resources.resource_filename(__name__, 'mrtydi_html_table_row.template'))
+
+    with open(pkg_resources.resource_filename(__name__, 'mrtydi.yaml')) as f:
+        yaml_data = yaml.safe_load(f)
+        for condition in yaml_data['conditions']:
+            name = condition['name']
+            eval_key = condition['eval_key']
+            cmd_template = condition['command']
+
+            for splits in condition['splits']:
+                split = splits['split']
+
+                runfile = os.path.join(args.directory, f'run.mrtydi.{name}.{split}.txt')
+                cmd = Template(cmd_template).substitute(split=split, output=runfile)
+                commands[name] = format_run_command(cmd)
+
+                for expected in splits['scores']:
+                    for metric in expected:
+                        table[name][split][metric] = expected[metric]
+
+                        eval_cmd = f'python -m pyserini.eval.trec_eval ' + \
+                                   f'{trec_eval_metric_definitions[metric]} {eval_key}-{split} {runfile}'
+                        eval_commands[name][metric] = format_eval_command(eval_cmd)
+
+        tables_html = []
+
+        # Build the table for MRR@100, test queries
+        html_rows = generate_table_rows(table, row_template, commands, eval_commands, 1, 'test', 'MRR@100')
+        all_rows = '\n'.join(html_rows)
+        tables_html.append(Template(table_template).substitute(desc='MRR@100, test queries', rows=all_rows))
+
+        # Build the table for R@100, test queries
+        html_rows = generate_table_rows(table, row_template, commands, eval_commands, 2, 'test', 'R@100')
+        all_rows = '\n'.join(html_rows)
+        tables_html.append(Template(table_template).substitute(desc='Recall@100, test queries', rows=all_rows))
+
+    with open(args.output, 'w') as out:
+        out.write(Template(html_template).substitute(title='Mr.TyDi', tables=' '.join(tables_html)))
+
+
+def run_conditions(args):
+    start = time.time()
+
+    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+
+    with open(pkg_resources.resource_filename(__name__, 'mrtydi.yaml')) as f:
+        yaml_data = yaml.safe_load(f)
+        for condition in yaml_data['conditions']:
+            name = condition['name']
+            encoder = name.split('.')[0]
+            lang = name.split('.')[-1]
+            if args.all:
+                pass
+            elif args.condition != encoder:
+                continue
+            elif args.language and args.language != lang:
+                continue
+            eval_key = condition['eval_key']
+            cmd_template = condition['command']
+
+            print(f'condition {name}:')
+
+            for splits in condition['splits']:
+                split = splits['split']
+
+                print(f'  - split: {split}')
+
+                runfile = os.path.join(args.directory, f'run.mrtydi.{name}.{split}.txt')
+                cmd = Template(cmd_template).substitute(split=split, output=runfile)
+
+                if args.display_commands:
+                    print(f'\n```bash\n{format_run_command(cmd)}\n```\n')
+
+                if not os.path.exists(runfile):
+                    if not args.dry_run:
+                        os.system(cmd)
+
+                for expected in splits['scores']:
+                    for metric in expected:
+                        if not args.skip_eval:
+                            score = float(run_eval_and_return_metric(metric, f'{eval_key}-{split}',
+                                                                     trec_eval_metric_definitions[metric], runfile))
+                            if math.isclose(score, float(expected[metric])):
+                                result_str = ok_str
+                            # Flaky test: small difference on orca
+                            elif name == 'mdpr-tied-pft-nq.te' and split == 'dev' \
+                                    and math.isclose(score, float(expected[metric]), abs_tol=2e-4):
+                                result_str = okish_str
+                            # Flaky test: small difference on orca
+                            elif name == 'mdpr-tied-pft-msmarco-ft-all.ko' and split == 'train' \
+                                    and math.isclose(score, float(expected[metric]), abs_tol=4e-4):
+                                result_str = okish_str
+                            # Flaky test: small difference on Mac Studio (M1)
+                            elif name == 'mdpr-tied-pft-msmarco.th' and split == 'train' \
+                                    and math.isclose(score, float(expected[metric]), abs_tol=3e-4):
+                                result_str = okish_str
+                            else:
+                                result_str = fail_str + f' expected {expected[metric]:.4f}'
+                            print(f'      {metric:7}: {score:.4f} {result_str}')
+                            table[name][split][metric] = score
+                        else:
+                            table[name][split][metric] = expected[metric]
+
+            print('')
+
+    for metric in ['MRR@100', 'R@100']:
+        for split in ['test', 'dev', 'train']:
+            print_results(table, metric, split)
+
+    end = time.time()
+    print(f'Total elapsed time: {end - start:.0f}s')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate regression matrix for MIRACL.')
+    parser.add_argument('--condition', type=str,
+                        help='Condition to run', required=False)
+    # To list all conditions
+    parser.add_argument('--list-conditions', action='store_true', default=False, help='List available conditions.')
+    # For generating reports
+    parser.add_argument('--generate-report', action='store_true', default=False, help='Generate report.')
+    parser.add_argument('--output', type=str, help='File to store report.', required=False)
+    # For actually running the experimental conditions
+    parser.add_argument('--all', action='store_true', default=False, help='Run using all languages.')
+    parser.add_argument('--language', type=str, help='Language to run.', required=False)
+    parser.add_argument('--directory', type=str, help='Base directory.', default='', required=False)
+    parser.add_argument('--dry-run', action='store_true', default=False, help='Print out commands but do not execute.')
+    parser.add_argument('--skip-eval', action='store_true', default=False, help='Skip running trec_eval.')
+    parser.add_argument('--display-commands', action='store_true', default=False, help='Display command.')
+    args = parser.parse_args()
+
+    if args.list_conditions:
+        list_conditions()
+        sys.exit()
+
+    if args.generate_report:
+        if not args.output:
+            print(f'Must specify report filename with --output.')
+            sys.exit()
+
+        generate_report(args)
+        sys.exit()
+
+    if args.all and (args.condition or args.language):
+        print('Specifying --all will run all conditions and languages')
+        sys.exit()
+
+    run_conditions(args)
diff --git a/pyserini/2cr/mrtydi.yaml b/pyserini/2cr/mrtydi.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..333e9509e46c8265a0b72747ffa76742e49e3fdf
--- /dev/null
+++ b/pyserini/2cr/mrtydi.yaml
@@ -0,0 +1,890 @@
+conditions:
+  # mDPR, tied encoders, pFT w/ MS MARCO, FT all
+  - name: mdpr-tied-pft-msmarco-ft-all.ar
+    eval_key: mrtydi-v1.1-arabic
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-arabic-${split} --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9505
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.6913
+            R@100: 0.9165
+      - split: test
+        scores:
+          - MRR@100: 0.6949
+            R@100: 0.9004
+  - name: mdpr-tied-pft-msmarco-ft-all.bn
+    eval_key: mrtydi-v1.1-bengali
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-bengali-${split} --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9620
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.5897
+            R@100: 0.8977
+      - split: test
+        scores:
+          - MRR@100: 0.6228
+            R@100: 0.9550
+  - name: mdpr-tied-pft-msmarco-ft-all.en
+    eval_key: mrtydi-v1.1-english
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-english-${split} --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.8278
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.5357
+            R@100: 0.8884
+      - split: test
+        scores:
+          - MRR@100: 0.4916
+            R@100: 0.8414
+  - name: mdpr-tied-pft-msmarco-ft-all.fi
+    eval_key: mrtydi-v1.1-finnish
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-finnish-${split} --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9577
+            R@100: 0.9997
+      - split: dev
+        scores:
+          - MRR@100: 0.6626
+            R@100: 0.9171
+      - split: test
+        scores:
+          - MRR@100: 0.5595
+            R@100: 0.8563
+  - name: mdpr-tied-pft-msmarco-ft-all.id
+    eval_key: mrtydi-v1.1-indonesian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-indonesian-${split} --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9469
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.6294
+            R@100: 0.9150
+      - split: test
+        scores:
+          - MRR@100: 0.5783
+            R@100: 0.8609
+  - name: mdpr-tied-pft-msmarco-ft-all.ja
+    eval_key: mrtydi-v1.1-japanese
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-japanese-${split} --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.8802
+            R@100: 0.9997
+      - split: dev
+        scores:
+          - MRR@100: 0.5505
+            R@100: 0.8696
+      - split: test
+        scores:
+          - MRR@100: 0.5007
+            R@100: 0.8130
+  - name: mdpr-tied-pft-msmarco-ft-all.ko
+    eval_key: mrtydi-v1.1-korean
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-korean-${split} --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9195
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.5645
+            R@100: 0.8663
+      - split: test
+        scores:
+          - MRR@100: 0.4861
+            R@100: 0.7854
+  - name: mdpr-tied-pft-msmarco-ft-all.ru
+    eval_key: mrtydi-v1.1-russian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-russian-${split} --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.8473
+            R@100: 0.9994
+      - split: dev
+        scores:
+          - MRR@100: 0.5104
+            R@100: 0.8720
+      - split: test
+        scores:
+          - MRR@100: 0.5161
+            R@100: 0.8432
+  - name: mdpr-tied-pft-msmarco-ft-all.sw
+    eval_key: mrtydi-v1.1-swahili
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-swahili-${split} --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9515
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.6404
+            R@100: 0.9018
+      - split: test
+        scores:
+          - MRR@100: 0.6438
+            R@100: 0.8756
+  - name: mdpr-tied-pft-msmarco-ft-all.te
+    eval_key: mrtydi-v1.1-telugu
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-telugu-${split} --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9679
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.7962
+            R@100: 0.9593
+      - split: test
+        scores:
+          - MRR@100: 0.8908
+            R@100: 0.9659
+  - name: mdpr-tied-pft-msmarco-ft-all.th
+    eval_key: mrtydi-v1.1-thai
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco-ft-all --topics mrtydi-v1.1-thai-${split} --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco-ft-all --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.9504
+            R@100: 1.0000
+      - split: dev
+        scores:
+          - MRR@100: 0.6670
+            R@100: 0.9114
+      - split: test
+        scores:
+          - MRR@100: 0.6175
+            R@100: 0.8826
+
+  # mDPR, tied encoders, pFT w/ MS MARCO
+  - name: mdpr-tied-pft-msmarco.ar
+    eval_key: mrtydi-v1.1-arabic
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-arabic-${split} --index mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3957
+            R@100: 0.7818
+      - split: dev
+        scores:
+          - MRR@100: 0.3978
+            R@100: 0.7778
+      - split: test
+        scores:
+          - MRR@100: 0.4414
+            R@100: 0.7971
+  - name: mdpr-tied-pft-msmarco.bn
+    eval_key: mrtydi-v1.1-bengali
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-bengali-${split} --index mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2920
+            R@100: 0.7323
+      - split: dev
+        scores:
+          - MRR@100: 0.2993
+            R@100: 0.7318
+      - split: test
+        scores:
+          - MRR@100: 0.3969
+            R@100: 0.7838
+  - name: mdpr-tied-pft-msmarco.en
+    eval_key: mrtydi-v1.1-english
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-english-${split} --index mrtydi-v1.1-english-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3374
+            R@100: 0.8111
+      - split: dev
+        scores:
+          - MRR@100: 0.3451
+            R@100: 0.7995
+      - split: test
+        scores:
+          - MRR@100: 0.3270
+            R@100: 0.7536
+  - name: mdpr-tied-pft-msmarco.fi
+    eval_key: mrtydi-v1.1-finnish
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-finnish-${split} --index mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3668
+            R@100: 0.7337
+      - split: dev
+        scores:
+          - MRR@100: 0.3636
+            R@100: 0.7371
+      - split: test
+        scores:
+          - MRR@100: 0.2750
+            R@100: 0.6471
+  - name: mdpr-tied-pft-msmarco.id
+    eval_key: mrtydi-v1.1-indonesian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-indonesian-${split} --index mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2794
+            R@100: 0.7044
+      - split: dev
+        scores:
+          - MRR@100: 0.2853
+            R@100: 0.7198
+      - split: test
+        scores:
+          - MRR@100: 0.3520
+            R@100: 0.7356
+  - name: mdpr-tied-pft-msmarco.ja
+    eval_key: mrtydi-v1.1-japanese
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-japanese-${split} --index mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3089
+            R@100: 0.7603
+      - split: dev
+        scores:
+          - MRR@100: 0.3108
+            R@100: 0.7597
+      - split: test
+        scores:
+          - MRR@100: 0.3107
+            R@100: 0.7317
+  - name: mdpr-tied-pft-msmarco.ko
+    eval_key: mrtydi-v1.1-korean
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-korean-${split} --index mrtydi-v1.1-korean-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3003
+            R@100: 0.6907
+      - split: dev
+        scores:
+          - MRR@100: 0.3017
+            R@100: 0.7046
+      - split: test
+        scores:
+          - MRR@100: 0.2820
+            R@100: 0.6172
+  - name: mdpr-tied-pft-msmarco.ru
+    eval_key: mrtydi-v1.1-russian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-russian-${split} --index mrtydi-v1.1-russian-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2856
+            R@100: 0.7305
+      - split: dev
+        scores:
+          - MRR@100: 0.2943
+            R@100: 0.7404
+      - split: test
+        scores:
+          - MRR@100: 0.3561
+            R@100: 0.7432
+  - name: mdpr-tied-pft-msmarco.sw
+    eval_key: mrtydi-v1.1-swahili
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-swahili-${split} --index mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2491
+            R@100: 0.5195
+      - split: dev
+        scores:
+          - MRR@100: 0.2447
+            R@100: 0.5266
+      - split: test
+        scores:
+          - MRR@100: 0.3418
+            R@100: 0.6343
+  - name: mdpr-tied-pft-msmarco.te
+    eval_key: mrtydi-v1.1-telugu
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-telugu-${split} --index mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3059
+            R@100: 0.7510
+      - split: dev
+        scores:
+          - MRR@100: 0.2995
+            R@100: 0.7355
+      - split: test
+        scores:
+          - MRR@100: 0.3102
+            R@100: 0.7817
+  - name: mdpr-tied-pft-msmarco.th
+    eval_key: mrtydi-v1.1-thai
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics mrtydi-v1.1-thai-${split} --index mrtydi-v1.1-thai-mdpr-tied-pft-msmarco --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2334
+            R@100: 0.5851
+      - split: dev
+        scores:
+          - MRR@100: 0.2407
+            R@100: 0.5795
+      - split: test
+        scores:
+          - MRR@100: 0.2693
+            R@100: 0.5945
+
+  # mDPR, tied encoders, pFT w/ NQ
+  - name: mdpr-tied-pft-nq.ar
+    eval_key: mrtydi-v1.1-arabic
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-arabic-${split} --index mrtydi-v1.1-arabic-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2087
+            R@100: 0.5854
+      - split: dev
+        scores:
+          - MRR@100: 0.2132
+            R@100: 0.5868
+      - split: test
+        scores:
+          - MRR@100: 0.2214
+            R@100: 0.6001
+  - name: mdpr-tied-pft-nq.bn
+    eval_key: mrtydi-v1.1-bengali
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-bengali-${split} --index mrtydi-v1.1-bengali-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2371
+            R@100: 0.6281
+      - split: dev
+        scores:
+          - MRR@100: 0.2414
+            R@100: 0.6409
+      - split: test
+        scores:
+          - MRR@100: 0.2535
+            R@100: 0.7072
+  - name: mdpr-tied-pft-nq.en
+    eval_key: mrtydi-v1.1-english
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-english-${split} --index mrtydi-v1.1-english-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2441
+            R@100: 0.7217
+      - split: dev
+        scores:
+          - MRR@100: 0.2359
+            R@100: 0.7187
+      - split: test
+        scores:
+          - MRR@100: 0.2433
+            R@100: 0.6893
+  - name: mdpr-tied-pft-nq.fi
+    eval_key: mrtydi-v1.1-finnish
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-finnish-${split} --index mrtydi-v1.1-finnish-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2996
+            R@100: 0.6787
+      - split: dev
+        scores:
+          - MRR@100: 0.3252
+            R@100: 0.7037
+      - split: test
+        scores:
+          - MRR@100: 0.2444
+            R@100: 0.6401
+  - name: mdpr-tied-pft-nq.id
+    eval_key: mrtydi-v1.1-indonesian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-indonesian-${split} --index mrtydi-v1.1-indonesian-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2706
+            R@100: 0.7322
+      - split: dev
+        scores:
+          - MRR@100: 0.2719
+            R@100: 0.7394
+      - split: test
+        scores:
+          - MRR@100: 0.2815
+            R@100: 0.6914
+  - name: mdpr-tied-pft-nq.ja
+    eval_key: mrtydi-v1.1-japanese
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-japanese-${split} --index mrtydi-v1.1-japanese-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2165
+            R@100: 0.6043
+      - split: dev
+        scores:
+          - MRR@100: 0.2299
+            R@100: 0.6239
+      - split: test
+        scores:
+          - MRR@100: 0.2058
+            R@100: 0.5734
+  - name: mdpr-tied-pft-nq.ko
+    eval_key: mrtydi-v1.1-korean
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-korean-${split} --index mrtydi-v1.1-korean-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2527
+            R@100: 0.6556
+      - split: dev
+        scores:
+          - MRR@100: 0.2680
+            R@100: 0.6271
+      - split: test
+        scores:
+          - MRR@100: 0.2234
+            R@100: 0.5499
+  - name: mdpr-tied-pft-nq.ru
+    eval_key: mrtydi-v1.1-russian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-russian-${split} --index mrtydi-v1.1-russian-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2160
+            R@100: 0.6262
+      - split: dev
+        scores:
+          - MRR@100: 0.2263
+            R@100: 0.6444
+      - split: test
+        scores:
+          - MRR@100: 0.2501
+            R@100: 0.6181
+  - name: mdpr-tied-pft-nq.sw
+    eval_key: mrtydi-v1.1-swahili
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-swahili-${split} --index mrtydi-v1.1-swahili-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2383
+            R@100: 0.5707
+      - split: dev
+        scores:
+          - MRR@100: 0.2543
+            R@100: 0.6138
+      - split: test
+        scores:
+          - MRR@100: 0.2621
+            R@100: 0.5965
+  - name: mdpr-tied-pft-nq.te
+    eval_key: mrtydi-v1.1-telugu
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-telugu-${split} --index mrtydi-v1.1-telugu-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1483
+            R@100: 0.4162
+      - split: dev
+        scores:
+          - MRR@100: 0.1494
+            R@100: 0.3967
+      - split: test
+        scores:
+          - MRR@100: 0.0970
+            R@100: 0.2454
+  - name: mdpr-tied-pft-nq.th
+    eval_key: mrtydi-v1.1-thai
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder-class auto --encoder castorini/mdpr-tied-pft-nq --topics mrtydi-v1.1-thai-${split} --index mrtydi-v1.1-thai-mdpr-tied-pft-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1426
+            R@100: 0.4717
+      - split: dev
+        scores:
+          - MRR@100: 0.1618
+            R@100: 0.4637
+      - split: test
+        scores:
+          - MRR@100: 0.1575
+            R@100: 0.4550
+
+  # mDPR, split encoders, pFT w/ NQ
+  - name: mdpr-split-pft-nq.ar
+    eval_key: mrtydi-v1.1-arabic
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-arabic-${split} --index mrtydi-v1.1-arabic-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2510
+            R@100: 0.6384
+      - split: dev
+        scores:
+          - MRR@100: 0.2449
+            R@100: 0.6334
+      - split: test
+        scores:
+          - MRR@100: 0.2907
+            R@100: 0.6502
+  - name: mdpr-split-pft-nq.bn
+    eval_key: mrtydi-v1.1-bengali
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-bengali-${split} --index mrtydi-v1.1-bengali-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2293
+            R@100: 0.6454
+      - split: dev
+        scores:
+          - MRR@100: 0.2367
+            R@100: 0.6511
+      - split: test
+        scores:
+          - MRR@100: 0.2911
+            R@100: 0.7793
+  - name: mdpr-split-pft-nq.en
+    eval_key: mrtydi-v1.1-english
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-english-${split} --index mrtydi-v1.1-english-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2862
+            R@100: 0.7372
+      - split: dev
+        scores:
+          - MRR@100: 0.2821
+            R@100: 0.7437
+      - split: test
+        scores:
+          - MRR@100: 0.2907
+            R@100: 0.6779
+  - name: mdpr-split-pft-nq.fi
+    eval_key: mrtydi-v1.1-finnish
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-finnish-${split} --index mrtydi-v1.1-finnish-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2473
+            R@100: 0.6289
+      - split: dev
+        scores:
+          - MRR@100: 0.2466
+            R@100: 0.6283
+      - split: test
+        scores:
+          - MRR@100: 0.2050
+            R@100: 0.5680
+  - name: mdpr-split-pft-nq.id
+    eval_key: mrtydi-v1.1-indonesian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-indonesian-${split} --index mrtydi-v1.1-indonesian-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2351
+            R@100: 0.6952
+      - split: dev
+        scores:
+          - MRR@100: 0.2475
+            R@100: 0.7181
+      - split: test
+        scores:
+          - MRR@100: 0.2705
+            R@100: 0.6848
+  - name: mdpr-split-pft-nq.ja
+    eval_key: mrtydi-v1.1-japanese
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-japanese-${split} --index mrtydi-v1.1-japanese-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1967
+            R@100: 0.5983
+      - split: dev
+        scores:
+          - MRR@100: 0.2055
+            R@100: 0.6142
+      - split: test
+        scores:
+          - MRR@100: 0.2119
+            R@100: 0.5840
+  - name: mdpr-split-pft-nq.ko
+    eval_key: mrtydi-v1.1-korean
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-korean-${split} --index mrtydi-v1.1-korean-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2383
+            R@100: 0.6180
+      - split: dev
+        scores:
+          - MRR@100: 0.2343
+            R@100: 0.6238
+      - split: test
+        scores:
+          - MRR@100: 0.2345
+            R@100: 0.5325
+  - name: mdpr-split-pft-nq.ru
+    eval_key: mrtydi-v1.1-russian
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-russian-${split} --index mrtydi-v1.1-russian-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2317
+            R@100: 0.6534
+      - split: dev
+        scores:
+          - MRR@100: 0.2490
+            R@100: 0.6553
+      - split: test
+        scores:
+          - MRR@100: 0.2820
+            R@100: 0.6474
+  - name: mdpr-split-pft-nq.sw
+    eval_key: mrtydi-v1.1-swahili
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-swahili-${split} --index mrtydi-v1.1-swahili-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1457
+            R@100: 0.4481
+      - split: dev
+        scores:
+          - MRR@100: 0.1547
+            R@100: 0.4724
+      - split: test
+        scores:
+          - MRR@100: 0.1883
+            R@100: 0.5281
+  - name: mdpr-split-pft-nq.te
+    eval_key: mrtydi-v1.1-telugu
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-telugu-${split} --index mrtydi-v1.1-telugu-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1489
+            R@100: 0.4905
+      - split: dev
+        scores:
+          - MRR@100: 0.1503
+            R@100: 0.4934
+      - split: test
+        scores:
+          - MRR@100: 0.1099
+            R@100: 0.3661
+  - name: mdpr-split-pft-nq.th
+    eval_key: mrtydi-v1.1-thai
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --encoder castorini/mdpr-question-nq --topics mrtydi-v1.1-thai-${split} --index mrtydi-v1.1-thai-mdpr-nq --output $output --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1603
+            R@100: 0.4983
+      - split: dev
+        scores:
+          - MRR@100: 0.1584
+            R@100: 0.5083
+      - split: test
+        scores:
+          - MRR@100: 0.1709
+            R@100: 0.5146
+
+  # BM25
+  - name: bm25.ar
+    eval_key: mrtydi-v1.1-arabic
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language ar --topics mrtydi-v1.1-arabic-${split} --index mrtydi-v1.1-arabic --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3356
+            R@100: 0.7944
+      - split: dev
+        scores:
+          - MRR@100: 0.3462
+            R@100: 0.7872
+      - split: test
+        scores:
+          - MRR@100: 0.3682
+            R@100: 0.7928
+  - name: bm25.bn
+    eval_key: mrtydi-v1.1-bengali
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language bn --topics mrtydi-v1.1-bengali-${split} --index mrtydi-v1.1-bengali --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3566
+          - R@100: 0.8336
+      - split: dev
+        scores:
+          - MRR@100: 0.3385
+          - R@100: 0.8432
+      - split: test
+        scores:
+          - MRR@100: 0.4182
+          - R@100: 0.8694
+  - name: bm25.en
+    eval_key: mrtydi-v1.1-english
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language en --topics mrtydi-v1.1-english-${split} --index mrtydi-v1.1-english --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.1592
+          - R@100: 0.5785
+      - split: dev
+        scores:
+          - MRR@100: 0.1685
+          - R@100: 0.6196
+      - split: test
+        scores:
+          - MRR@100: 0.1404
+          - R@100: 0.5365
+  - name: bm25.fi
+    eval_key: mrtydi-v1.1-finnish
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language fi --topics mrtydi-v1.1-finnish-${split} --index mrtydi-v1.1-finnish --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.4101
+          - R@100: 0.8198
+      - split: dev
+        scores:
+          - MRR@100: 0.4136
+          - R@100: 0.8285
+      - split: test
+        scores:
+          - MRR@100: 0.2836
+          - R@100: 0.7196
+  - name: bm25.id
+    eval_key: mrtydi-v1.1-indonesian
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language id --topics mrtydi-v1.1-indonesian-${split} --index mrtydi-v1.1-indonesian --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2972
+          - R@100: 0.7948
+      - split: dev
+        scores:
+          - MRR@100: 0.2937
+          - R@100: 0.7827
+      - split: test
+        scores:
+          - MRR@100: 0.3762
+          - R@100: 0.8426
+  - name: bm25.ja
+    eval_key: mrtydi-v1.1-japanese
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language ja --topics mrtydi-v1.1-japanese-${split} --index mrtydi-v1.1-japanese --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2262
+          - R@100: 0.7290
+      - split: dev
+        scores:
+          - MRR@100: 0.2250
+          - R@100: 0.7252
+      - split: test
+        scores:
+          - MRR@100: 0.2125
+          - R@100: 0.6431
+  - name: bm25.ko
+    eval_key: mrtydi-v1.1-korean
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language ko --topics mrtydi-v1.1-korean-${split} --index mrtydi-v1.1-korean --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2596
+          - R@100: 0.6178
+      - split: dev
+        scores:
+          - MRR@100: 0.2888
+          - R@100: 0.6733
+      - split: test
+        scores:
+          - MRR@100: 0.2848
+          - R@100: 0.6188
+  - name: bm25.ru
+    eval_key: mrtydi-v1.1-russian
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language ru --topics mrtydi-v1.1-russian-${split} --index mrtydi-v1.1-russian --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2229
+          - R@100: 0.5779
+      - split: dev
+        scores:
+          - MRR@100: 0.2202
+          - R@100: 0.5760
+      - split: test
+        scores:
+          - MRR@100: 0.3163
+          - R@100: 0.6541
+  - name: bm25.sw
+    eval_key: mrtydi-v1.1-swahili
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language sw --topics mrtydi-v1.1-swahili-${split} --index mrtydi-v1.1-swahili --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.2610
+          - R@100: 0.5903
+      - split: dev
+        scores:
+          - MRR@100: 0.2693
+          - R@100: 0.5789
+      - split: test
+        scores:
+          - MRR@100: 0.3893
+          - R@100: 0.7642
+  - name: bm25.te
+    eval_key: mrtydi-v1.1-telugu
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language te --topics mrtydi-v1.1-telugu-${split} --index mrtydi-v1.1-telugu --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.4204
+          - R@100: 0.8229
+      - split: dev
+        scores:
+          - MRR@100: 0.4269
+          - R@100: 0.8362
+      - split: test
+        scores:
+          - MRR@100: 0.5283
+          - R@100: 0.8971
+  - name: bm25.th
+    eval_key: mrtydi-v1.1-thai
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --language th --topics mrtydi-v1.1-thai-${split} --index mrtydi-v1.1-thai --output $output --bm25 --hits 100
+    splits:
+      - split: train
+        scores:
+          - MRR@100: 0.3543
+          - R@100: 0.8349
+      - split: dev
+        scores:
+          - MRR@100: 0.3586
+          - R@100: 0.8536
+      - split: test
+        scores:
+          - MRR@100: 0.4012
+          - R@100: 0.8529
diff --git a/pyserini/2cr/mrtydi_html.template b/pyserini/2cr/mrtydi_html.template
new file mode 100644
index 0000000000000000000000000000000000000000..21ec6fcd0d79b631e56f418f78ba0bf9dd0feb3e
--- /dev/null
+++ b/pyserini/2cr/mrtydi_html.template
@@ -0,0 +1,256 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>Pyserini Reproductions</title>
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" />
+    <!-- Google Fonts Roboto -->
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" />
+    <!-- MDB -->
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" />
+
+    <style>
+tr.hide-table-padding td {
+  padding: 0;
+}
+
+.expand-button {
+  position: relative;
+}
+
+.accordion-toggle .expand-button:after {
+  position: absolute;
+  left:.75rem;
+  top: 50%;
+  transform: translate(0, -50%);
+  content: '-';
+}
+
+.accordion-toggle.collapsed .expand-button:after {
+  content: '+';
+}
+
+blockquote.mycode {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  margin-top: 15px;
+  padding-left: 15px;
+}
+
+blockquote.mycode2 {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  padding-top: 10px;
+  padding-bottom: 10px;
+  padding-left: 15px;
+}
+
+tr th.headertop {
+  border-bottom: none;
+  padding-bottom: 0rem
+}
+
+tr th.headerbottom {
+  padding-top: 0rem
+}
+
+.table>:not(caption)>*>*{padding:0.75rem 0.75rem}
+
+.copy-code-button {
+	border-radius: 0;
+	min-width: 55px;
+	background: none repeat scroll 0 0 transparent;
+	background-color: grey;
+	color: #F1F2F3 !important;
+	cursor: pointer;
+	border-style: none;
+	font-family: 'HELVETICA',sans-serif;
+	font-size: 0.8em;
+	font-weight: normal;
+	text-align: center;
+	text-decoration: none;
+	text-indent: 0;
+	text-transform: uppercase;
+	font-weight: 500;
+	line-height: 1.42rem;
+	margin: 0;
+	padding: 3px 8px;
+	position: absolute !important;
+	top: 0 !important;
+	right: 0 !important;
+}
+
+.copy-code-button > span {
+	color: #F1F2F3 !important;
+}
+
+.copy-code-button, ::before, ::after {
+	box-sizing: inherit;
+}
+
+.copy-code-button::before {
+	content: '';
+	display: inline-block;
+	width: 16px;
+	height: 16px;
+	margin-right: 3px;
+	background-size: contain;
+	background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg==");
+	background-repeat: no-repeat;
+	position: relative;
+	top: 3px;
+}
+
+.copy-code-button:focus {
+    /* Avoid an ugly focus outline on click in Chrome,
+       but darken the button for accessibility.
+       See https://stackoverflow.com/a/25298082/1481479 */
+    /* background-color: #E6E6E6; */
+	outline: 0;
+}
+
+pre[class*="prettyprint"] {
+	position: relative;
+	overflow: hidden;
+}
+    </style>
+</head>
+<body>
+
+    <!-- Background image -->
+    <div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px">
+      <div class="mask" style="
+            background: linear-gradient(
+              45deg,
+              rgba(29, 236, 197, 0.7),
+              rgba(91, 14, 214, 0.7) 100%
+            );
+          ">
+        <div class="container d-flex align-items-center justify-content-center text-center h-100"  style="max-height: 150px">
+          <div class="text-white">
+            <h1 class="mb-3">$title</h1>
+          </div>
+        </div>
+      </div>
+    </div>
+    <!-- Background image -->
+
+    <div class="container my-4">
+
+    $tables
+
+  </ul>
+
+  <div style="padding-top: 20px"/>
+
+  <h4>Programmatic Execution</h4>
+
+  <p>All experimental runs shown in the above table can be programmatically executed based on the instructions below.
+  To list all the experimental conditions:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --list-conditions
+  </tt></blockquote>
+
+  <p>Run all languages for a specific condition and show commands:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --condition bm25 --display-commands
+  </tt></blockquote>
+
+  <p>Run a particular language for a specific condition and show commands:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --condition bm25 --language ko --display-commands
+  </tt></blockquote>
+
+  <p>Run all languages for all conditions and show commands:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --all --display-commands
+  </tt></blockquote>
+
+  <p>With the above commands, run files will be placed in the current directory. Use the option <tt>--directory runs</tt> to place the runs in a sub-directory.</p>
+
+  <p>For a specific condition, just show the commands and do not run:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --condition bm25 --display-commands --dry-run
+  </tt></blockquote>
+
+  <p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p>
+
+  <p>For a specific condition and language, just show the commands and do not run:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --condition bm25 --language ko --display-commands --dry-run
+  </tt></blockquote>
+
+  <p>For all conditions, just show the commands and do not run and skip evaluation:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --all --display-commands --dry-run --skip-eval
+  </tt></blockquote>
+
+  <p>Finally, to generate this page:</p>
+
+  <blockquote class="mycode2"><tt>
+  python -m pyserini.2cr.mrtydi --generate-report --output docs/2cr/mrtydi.html
+  </tt></blockquote>
+
+  <p>The output file <tt>mrtydi.html</tt> should be identical to this page.</p>
+
+  <div style="padding-top: 50px"/>
+
+    </div>
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script>
+    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script>
+
+<script>
+document.querySelectorAll('pre').forEach(function (codeBlock) {
+    var button = document.createElement('button');
+    button.className = 'copy-code-button';
+    button.type = 'button';
+    var s = codeBlock.innerText;
+    button.setAttribute('data-clipboard-text',s);
+    button.innerText = 'Copy';
+
+    // var pre = codeBlock.parentNode;
+    codeBlock.classList.add('prettyprint');
+    // pre.parentNode.insertBefore(button, pre);
+    codeBlock.appendChild(button);
+});
+
+var clipboard = new ClipboardJS('.copy-code-button');
+
+clipboard.on('success', function(e) {
+  console.info('Action:', e.action);
+  console.info('Text:', e.text);
+  console.info('Trigger:', e.trigger);
+  e.trigger.textContent = 'Copied';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+clipboard.on('error', function(e) {
+  console.error('Action:', e.action);
+  console.error('Trigger:', e.trigger);
+  e.trigger.textContent = 'Error Copying';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+</script>
+
+</body>
+</html>
diff --git a/pyserini/2cr/mrtydi_html_table.template b/pyserini/2cr/mrtydi_html_table.template
new file mode 100644
index 0000000000000000000000000000000000000000..f61a3d2102360cfa860c75ecd91f0dd13d4fe049
--- /dev/null
+++ b/pyserini/2cr/mrtydi_html_table.template
@@ -0,0 +1,28 @@
+<div class="table-responsive">
+  <table class="table">
+    <thead>
+      <tr>
+        <th scope="col"></th>
+        <th scope="col">$desc</th>
+        <th scope="col">ar</th>
+        <th scope="col">bn</th>
+        <th scope="col">en</th>
+        <th scope="col">fi</th>
+        <th scope="col">id</th>
+        <th scope="col">ja</th>
+        <th scope="col">ko</th>
+        <th scope="col">ru</th>
+        <th scope="col">sw</th>
+        <th scope="col">te</th>
+        <th scope="col">th</th>
+        <th scope="col"></th>
+        <th scope="col">avg</th>
+      </tr>
+    </thead>
+    <tbody>
+
+$rows
+
+    </tbody>
+  </table>
+</div>
diff --git a/pyserini/2cr/mrtydi_html_table_row.template b/pyserini/2cr/mrtydi_html_table_row.template
new file mode 100644
index 0000000000000000000000000000000000000000..913cbc5b7cfc6a38cd92f14321e12ca6b48b2aa1
--- /dev/null
+++ b/pyserini/2cr/mrtydi_html_table_row.template
@@ -0,0 +1,212 @@
+<!-- Condition: $model -->
+<tr class="accordion-toggle collapsed" id="table${table_cnt}-row${row_cnt}" data-toggle="collapse" data-parent="#table${table_cnt}-row${row_cnt}" href="#table${table_cnt}-collapse${row_cnt}">
+<td class="expand-button"></td>
+<td>$model</td>
+<td>$ar</td>
+<td>$bn</td>
+<td>$en</td>
+<td>$fi</td>
+<td>$id</td>
+<td>$ja</td>
+<td>$ko</td>
+<td>$ru</td>
+<td>$sw</td>
+<td>$te</td>
+<td>$th</td>
+<td></td>
+<td>$avg</td>
+</tr>
+<tr class="hide-table-padding">
+<td></td>
+<td></td>
+<td colspan="13" style="max-width: 600px">
+<div id="table${table_cnt}-collapse${row_cnt}" class="collapse in p-3">
+
+<!-- Tabs navs -->
+<ul class="nav nav-tabs mb-3" id="table${table_cnt}-row${row_cnt}-tabs" role="tablist">
+  <li class="nav-item" role="presentation">
+    <a class="nav-link active" id="table${table_cnt}-row${row_cnt}-tab1-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab1" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab1" aria-selected="true" style="text-transform:none">ar</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab2-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab2" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab2" aria-selected="false" style="text-transform:none">bn</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab3-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab3" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">en</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab4-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab4" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">fi</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab5-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab5" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">id</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab6-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab6" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">ja</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab7-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab7" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">ko</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab8-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab8" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">ru</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab9-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab9" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">sw</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab10-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab10" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">te</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="table${table_cnt}-row${row_cnt}-tab11-header" data-mdb-toggle="tab" href="#table${table_cnt}-row${row_cnt}-tab11" role="tab" aria-controls="table${table_cnt}-row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">th</a>
+  </li>
+</ul>
+<!-- Tabs navs -->
+
+<!-- Tabs content -->
+<div class="tab-content" id="table${table_cnt}-row${row_cnt}-content">
+  <div class="tab-pane fade show active" id="table${table_cnt}-row${row_cnt}-tab1" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab1">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd1}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab2" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab2">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd2
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd2}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab3" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab3">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd3
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd3}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab4" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab4">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd4
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd4}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab5" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab5">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd5
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd5}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab6" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab6">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd6
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd6}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab7" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab7">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd7
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd7}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab8" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab8">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd8
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd8}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab9" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab9">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd9
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd9}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab10" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab10">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd10
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd10}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="table${table_cnt}-row${row_cnt}-tab11" role="tabpanel" aria-labelledby="table${table_cnt}-row${row_cnt}-tab11">
+Command to generate run:
+
+  <blockquote class="mycode">
+<pre><code>$cmd11
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd11}</code></pre>
+  </blockquote>
+
+  </div>
+</div>
+<!-- Tabs content -->
+
+</div></td>
+</tr>
\ No newline at end of file
diff --git a/pyserini/2cr/msmarco-v1-doc.yaml b/pyserini/2cr/msmarco-v1-doc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ca015d38c50fb0c0bec6f5f42aa2884ef3a59081
--- /dev/null
+++ b/pyserini/2cr/msmarco-v1-doc.yaml
@@ -0,0 +1,539 @@
+conditions:
+  - name: bm25-doc-tuned
+    display: BM25 doc (k1=4.46, b=0.82)
+    display-html: BM25 doc (<i>k<sub><small>1</small></sub></i>=4.46, <i>b</i>=0.82)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2767
+            R@1K: 0.9357
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2336
+            nDCG@10: 0.5233
+            R@1K: 0.6757
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3581
+            nDCG@10: 0.5061
+            R@1K: 0.7776
+  - name: bm25-doc-default
+    display: BM25 doc (k1=0.9, b=0.4)
+    display-html: BM25 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1a)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2299
+            R@1K: 0.8856
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2434
+            nDCG@10: 0.5176
+            R@1K: 0.6966
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3793
+            nDCG@10: 0.5286
+            R@1K: 0.8085
+  - name: bm25-doc-segmented-tuned
+    display: BM25 doc segmented (k1=2.16, b=0.61)
+    display-html: BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2756
+            R@1K: 0.9311
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2398
+            nDCG@10: 0.5389
+            R@1K: 0.6565
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3458
+            nDCG@10: 0.5213
+            R@1K: 0.7725
+  - name: bm25-doc-segmented-default
+    display: BM25 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1b)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2684
+            R@1K: 0.9178
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2449
+            nDCG@10: 0.5302
+            R@1K: 0.6871
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3586
+            nDCG@10: 0.5281
+            R@1K: 0.7755
+  - name: bm25-rm3-doc-tuned
+    display: BM25+RM3 doc (k1=4.46, b=0.82)
+    display-html: BM25+RM3 doc (<i>k<sub><small>1</small></sub></i>=4.46, <i>b</i>=0.82)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2227
+            R@1K: 0.9303
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2638
+            nDCG@10: 0.5526
+            R@1K: 0.7188
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3610
+            nDCG@10: 0.5195
+            R@1K: 0.8180
+  - name: bm25-rm3-doc-default
+    display: BM25+RM3 doc (k1=0.9, b=0.4)
+    display-html: BM25+RM3 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1c)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.1618
+            R@1K: 0.8783
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2773
+            nDCG@10: 0.5174
+            R@1K: 0.7507
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4015
+            nDCG@10: 0.5254
+            R@1K: 0.8259
+  - name: bm25-rm3-doc-segmented-tuned
+    display: BM25+RM3 doc segmented (k1=2.16, b=0.61)
+    display-html: BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2448
+            R@1K: 0.9359
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2655
+            nDCG@10: 0.5392
+            R@1K: 0.7037
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3471
+            nDCG@10: 0.5030
+            R@1K: 0.8056
+  - name: bm25-rm3-doc-segmented-default
+    display: BM25+RM3 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1d)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2413
+            R@1K: 0.9351
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2892
+            nDCG@10: 0.5684
+            R@1K: 0.7368
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3792
+            nDCG@10: 0.5202
+            R@1K: 0.8023
+  - name: bm25-rocchio-doc-tuned
+    display: BM25+Rocchio doc (k1=4.46, b=0.82)
+    display-html: BM25+Rocchio doc (<i>k<sub><small>1</small></sub></i>=4.46, <i>b</i>=0.82)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc --topics $topics --output $output --bm25 --rocchio
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2242
+            R@1K: 0.9314
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2657
+            nDCG@10: 0.5584
+            R@1K: 0.7299
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3628
+            nDCG@10: 0.5199
+            R@1K: 0.8217
+  - name: bm25-rocchio-doc-default
+    display: BM25+Rocchio doc (k1=0.9, b=0.4)
+    display-html: BM25+Rocchio doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.1624
+            R@1K: 0.8789
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2811
+            nDCG@10: 0.5256
+            R@1K: 0.7546
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4089
+            nDCG@10: 0.5192
+            R@1K: 0.8273
+  - name: bm25-rocchio-doc-segmented-tuned
+    display: BM25+Rocchio doc segmented (k1=2.16, b=0.61)
+    display-html: BM25+Rocchio doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rocchio --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2475
+            R@1K: 0.9395
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2672
+            nDCG@10: 0.5421
+            R@1K: 0.7115
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3521
+            nDCG@10: 0.4997
+            R@1K: 0.8042
+  - name: bm25-rocchio-doc-segmented-default
+    display: BM25+Rocchio doc segmented (k1=0.9, b=0.4)
+    display-html: BM25+Rocchio doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2447
+            R@1K: 0.9351
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2889
+            nDCG@10: 0.5570
+            R@1K: 0.7423
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3830
+            nDCG@10: 0.5226
+            R@1K: 0.8102
+  - name: bm25-d2q-t5-doc-tuned
+    display: BM25 w/ doc2query-T5 doc (k1=4.68, b=0.87)
+    display-html: BM25 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=4.68, <i>b</i>=0.87)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5 --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3269
+            R@1K: 0.9553
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2620
+            nDCG@10: 0.5972
+            R@1K: 0.6867
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4099
+            nDCG@10: 0.5852
+            R@1K: 0.8105
+  - name: bm25-d2q-t5-doc-default
+    display: BM25 w/ doc2query-T5 doc (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2a)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2880
+            R@1K: 0.9259
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2700
+            nDCG@10: 0.5968
+            R@1K: 0.7190
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4230
+            nDCG@10: 0.5885
+            R@1K: 0.8403
+  - name: bm25-d2q-t5-doc-segmented-tuned
+    display: BM25 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
+    display-html: BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5 --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3209
+            R@1K: 0.9530
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2658
+            nDCG@10: 0.6273
+            R@1K: 0.6707
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4047
+            nDCG@10: 0.5943
+            R@1K: 0.7968
+  - name: bm25-d2q-t5-doc-segmented-default
+    display: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2b)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3179
+            R@1K: 0.9490
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2798
+            nDCG@10: 0.6119
+            R@1K: 0.7165
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4150
+            nDCG@10: 0.5957
+            R@1K: 0.8046
+  - name: bm25-rm3-d2q-t5-doc-tuned
+    display: BM25+RM3 w/ doc2query-T5 doc (k1=4.68, b=0.87)
+    display-html: BM25+RM3 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=4.68, <i>b</i>=0.87)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2623
+            R@1K: 0.9522
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2813
+            nDCG@10: 0.6091
+            R@1K: 0.7184
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4100
+            nDCG@10: 0.5745
+            R@1K: 0.8238
+  - name: bm25-rm3-d2q-t5-doc-default
+    display: BM25+RM3 w/ doc2query-T5 doc (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2c)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.1834
+            R@1K: 0.9126
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.3045
+            nDCG@10: 0.5904
+            R@1K: 0.7737
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4230
+            nDCG@10: 0.5427
+            R@1K: 0.8631
+  - name: bm25-rm3-d2q-t5-doc-segmented-tuned
+    display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
+    display-html: BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2973
+            R@1K: 0.9563
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2892
+            nDCG@10: 0.6247
+            R@1K: 0.7069
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4016
+            nDCG@10: 0.5711
+            R@1K: 0.8156
+  - name: bm25-rm3-d2q-t5-doc-segmented-default
+    display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2d)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.2803
+            R@1K: 0.9551
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.3030
+            nDCG@10: 0.6290
+            R@1K: 0.7483
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.4271
+            nDCG@10: 0.5851
+            R@1K: 0.8266
+  - name: unicoil-noexp-pytorch
+    display: "uniCOIL (noexp): query inference with PyTorch"
+    display-html: "uniCOIL (noexp): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3410
+            R@1K: 0.9420
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2661
+            nDCG@10: 0.6347
+            R@1K: 0.6385
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3698
+            nDCG@10: 0.5906
+            R@1K: 0.7621
+  - name: unicoil-noexp
+    display: "uniCOIL (noexp): pre-encoded"
+    display-html: "uniCOIL (noexp): pre-encoded queries"
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (3a)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil-noexp --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev-unicoil-noexp
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3409
+            R@1K: 0.9420
+      - topic_key: dl19-doc-unicoil-noexp
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2665
+            nDCG@10: 0.6349
+            R@1K: 0.6391
+      - topic_key: dl20-unicoil-noexp
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3698
+            nDCG@10: 0.5893
+            R@1K: 0.7623
+  - name: unicoil-pytorch
+    display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3532
+            R@1K: 0.9546
+      - topic_key: dl19-doc
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2789
+            nDCG@10: 0.6396
+            R@1K: 0.6654
+      - topic_key: dl20
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3881
+            nDCG@10: 0.6030
+            R@1K: 0.7866
+  - name: unicoil
+    display: "uniCOIL (w/ doc2query-T5): pre-encoded"
+    display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (3b)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-doc-dev-unicoil
+        eval_key: msmarco-doc-dev
+        scores:
+          - MRR@10: 0.3531
+            R@1K: 0.9546
+      - topic_key: dl19-doc-unicoil
+        eval_key: dl19-doc
+        scores:
+          - MAP: 0.2789
+            nDCG@10: 0.6396
+            R@1K: 0.6652
+      - topic_key: dl20-unicoil
+        eval_key: dl20-doc
+        scores:
+          - MAP: 0.3882
+            nDCG@10: 0.6033
+            R@1K: 0.7869
diff --git a/pyserini/2cr/msmarco-v1-passage.yaml b/pyserini/2cr/msmarco-v1-passage.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ef3932cb394c7af153b6cafb75f870e97887ca64
--- /dev/null
+++ b/pyserini/2cr/msmarco-v1-passage.yaml
@@ -0,0 +1,764 @@
+conditions:
+  - name: bm25-rocchio-d2q-t5-tuned
+    display: BM25+Rocchio w/ doc2query-T5 (k1=2.18, b=0.86)
+    display-html: BM25+Rocchio w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=2.18, <i>b</i>=0.86)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rocchio
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.2395
+            R@1K: 0.9535
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4339
+            nDCG@10: 0.6559
+            R@1K: 0.8465
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4376
+            nDCG@10: 0.6224
+            R@1K: 0.8641
+  - name: bm25-rocchio-d2q-t5-default
+    display: BM25+Rocchio w/ doc2query-T5 (k1=0.9, b=0.4)
+    display-html: BM25+Rocchio w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.2158
+            R@1K: 0.9467
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4469
+            nDCG@10: 0.6538
+            R@1K: 0.8855
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4246
+            nDCG@10: 0.6102
+            R@1K: 0.8675
+  - name: bm25-rocchio-default
+    display: BM25+Rocchio (k1=0.9, b=0.4)
+    display-html: BM25+Rocchio (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --rocchio
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.1595
+            R@1K: 0.8620
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3474
+            nDCG@10: 0.5275
+            R@1K: 0.8007
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.3115
+            nDCG@10: 0.4910
+            R@1K: 0.8156
+  - name: bm25-rocchio-tuned
+    display: BM25+Rocchio (k1=0.82, b=0.68)
+    display-html: BM25+Rocchio (<i>k<sub><small>1</small></sub></i>=0.82, <i>b</i>=0.68)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage --topics $topics --output $output --bm25 --rocchio
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.1684
+            R@1K: 0.8726
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3396
+            nDCG@10: 0.5275
+            R@1K: 0.7948
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.3120
+            nDCG@10: 0.4908
+            R@1K: 0.8327
+  - name: distilbert-kd-tasb-pytorch
+    display: "DistilBERT KD TASB: query inference with PyTorch"
+    display-html: "DistilBERT KD TASB: query inference with PyTorch"
+    display-row: "[5]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3444
+            R@1K: 0.9771
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4590
+            nDCG@10: 0.7210
+            R@1K: 0.8406
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4698
+            nDCG@10: 0.6854
+            R@1K: 0.8727
+  - name: distilbert-kd-tasb
+    display: "DistilBERT KD TASB: pre-encoded"
+    display-html: "DistilBERT KD TASB: pre-encoded queries"
+    display-row: "[5]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoded-queries distilbert_tas_b-$topics --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3444
+            R@1K: 0.9771
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4590
+            nDCG@10: 0.7210
+            R@1K: 0.8406
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4698
+            nDCG@10: 0.6854
+            R@1K: 0.8727
+  - name: distilbert-kd-pytorch
+    display: "DistilBERT KD: query inference with PyTorch"
+    display-html: "DistilBERT KD: query inference with PyTorch"
+    display-row: "[4]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.distilbert-dot-margin-mse-t2 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3251
+            R@1K: 0.9553
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4053
+            nDCG@10: 0.6994
+            R@1K: 0.7653
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4159
+            nDCG@10: 0.6447
+            R@1K: 0.7953
+  - name: distilbert-kd
+    display: "DistilBERT KD: pre-encoded"
+    display-html: "DistilBERT KD: pre-encoded queries"
+    display-row: "[4]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.distilbert-dot-margin-mse-t2 --topics $topics --encoded-queries distilbert_kd-$topics --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3251
+            R@1K: 0.9553
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4053
+            nDCG@10: 0.6994
+            R@1K: 0.7653
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4159
+            nDCG@10: 0.6447
+            R@1K: 0.7953
+  - name: ance-pytorch
+    display: "ANCE: query inference with PyTorch"
+    display-html: "ANCE: query inference with PyTorch"
+    display-row: "[3]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.ance --topics $topics --encoder castorini/ance-msmarco-passage --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3302
+            R@1K: 0.9587
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3710
+            nDCG@10: 0.6452
+            R@1K: 0.7554
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4076
+            nDCG@10: 0.6458
+            R@1K: 0.7764
+  - name: ance
+    display: "ANCE: pre-encoded"
+    display-html: "ANCE: pre-encoded queries"
+    display-row: "[3]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.ance --topics $topics --encoded-queries ance-$topics --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3302
+            R@1K: 0.9584
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3710
+            nDCG@10: 0.6452
+            R@1K: 0.7554
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4076
+            nDCG@10: 0.6458
+            R@1K: 0.7764
+  - name: bm25-tuned
+    display: BM25 (k1=0.82, b=0.68)
+    display-html: BM25 (<i>k<sub><small>1</small></sub></i>=0.82, <i>b</i>=0.68)
+    command: python -m pyserini.search.lucene --topics $topics --index msmarco-v1-passage --output $output --bm25
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.1875
+            R@1K: 0.8573
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.2903
+            nDCG@10: 0.4973
+            R@1K: 0.7450
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.2876
+            nDCG@10: 0.4876
+            R@1K: 0.8031
+  - name: bm25-rm3-tuned
+    display: BM25+RM3 (k1=0.82, b=0.68)
+    display-html: BM25+RM3 (<i>k<sub><small>1</small></sub></i>=0.82, <i>b</i>=0.68)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.1646
+            R@1K: 0.8704
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3339
+            nDCG@10: 0.5147
+            R@1K: 0.7950
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.3017
+            nDCG@10: 0.4924
+            R@1K: 0.8292
+  - name: bm25-default
+    display: BM25 (k1=0.9, b=0.4)
+    display-html: BM25 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1a)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.1840
+            R@1K: 0.8526
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3013
+            nDCG@10: 0.5058
+            R@1K: 0.7501
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.2856
+            nDCG@10: 0.4796
+            R@1K: 0.7863
+  - name: bm25-rm3-default
+    display: BM25+RM3 (k1=0.9, b=0.4)
+    display-html: BM25+RM3 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (1b)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --rm3
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.1566
+            R@1K: 0.8606
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.3416
+            nDCG@10: 0.5216
+            R@1K: 0.8136
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.3006
+            nDCG@10: 0.4896
+            R@1K: 0.8236
+  - name: bm25-d2q-t5-tuned
+    display: BM25 w/ doc2query-T5 (k1=2.18, b=0.86)
+    display-html: BM25 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=2.18, <i>b</i>=0.86)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5 --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.2816
+            R@1K: 0.9506
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4046
+            nDCG@10: 0.6336
+            R@1K: 0.8134
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4171
+            nDCG@10: 0.6265
+            R@1K: 0.8393
+  - name: bm25-d2q-t5-default
+    display: BM25 w/ doc2query-T5 (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2a)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.2723
+            R@1K: 0.9470
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4034
+            nDCG@10: 0.6417
+            R@1K: 0.8310
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4074
+            nDCG@10: 0.6187
+            R@1K: 0.8452
+  - name: bm25-rm3-d2q-t5-tuned
+    display: BM25+RM3 w/ doc2query-T5 (k1=2.18, b=0.86)
+    display-html: BM25+RM3 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=2.18, <i>b</i>=0.86)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.2382
+            R@1K: 0.9528
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4377
+            nDCG@10: 0.6537
+            R@1K: 0.8443
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4348
+            nDCG@10: 0.6235
+            R@1K: 0.8605
+  - name: bm25-rm3-d2q-t5-default
+    display: BM25+RM3 w/ doc2query-T5 (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (2b)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.2139
+            R@1K: 0.9460
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4483
+            nDCG@10: 0.6586
+            R@1K: 0.8863
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4286
+            nDCG@10: 0.6131
+            R@1K: 0.8700
+  - name: unicoil-pytorch
+    display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3509
+            R@1K: 0.9581
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4617
+            nDCG@10: 0.7027
+            R@1K: 0.8291
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4429
+            nDCG@10: 0.6745
+            R@1K: 0.8433
+  - name: unicoil-onnx
+    display: "uniCOIL (w/ doc2query-T5): query inference with ONNX"
+    display-html: "uniCOIL (w/ doc2query-T5): query inference with ONNX"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --onnx-encoder UniCoil --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3509
+            R@1K: 0.9581
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4617
+            nDCG@10: 0.7027
+            R@1K: 0.8291
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4429
+            nDCG@10: 0.6745
+            R@1K: 0.8433
+  - name: unicoil
+    display: "uniCOIL (w/ doc2query-T5): pre-encoded"
+    display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (3b)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset-unicoil
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3516
+            R@1K: 0.9582
+      - topic_key: dl19-passage-unicoil
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4612
+            nDCG@10: 0.7024
+            R@1K: 0.8292
+      - topic_key: dl20-unicoil
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4430
+            nDCG@10: 0.6745
+            R@1K: 0.8430
+  - name: unicoil-noexp-pytorch
+    display: "uniCOIL (noexp): query inference with PyTorch"
+    display-html: "uniCOIL (noexp): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3153
+            R@1K: 0.9239
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4033
+            nDCG@10: 0.6434
+            R@1K: 0.7752
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4022
+            nDCG@10: 0.6524
+            R@1K: 0.7861
+  - name: unicoil-noexp-onnx
+    display: "uniCOIL (noexp): query inference with ONNX"
+    display-html: "uniCOIL (noexp): query inference with ONNX"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --onnx-encoder UniCoil --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3119
+            R@1K: 0.9239
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4061
+            nDCG@10: 0.6531
+            R@1K: 0.7809
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.3909
+            nDCG@10: 0.6388
+            R@1K: 0.7915
+  - name: unicoil-noexp
+    display: "uniCOIL (noexp): pre-encoded"
+    display-html: "uniCOIL (noexp): pre-encoded queries"
+    display-row: "[<a href=\"#\" data-mdb-toggle=\"tooltip\" title=\"Ma et al. (SIGIR 2021) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.\">1</a>] &mdash; (3a)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset-unicoil-noexp
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3153
+            R@1K: 0.9239
+      - topic_key: dl19-passage-unicoil-noexp
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4033
+            nDCG@10: 0.6433
+            R@1K: 0.7752
+      - topic_key: dl20-unicoil-noexp
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4021
+            nDCG@10: 0.6523
+            R@1K: 0.7861
+  - name: splade-pp-ed-onnx
+    display: "SPLADE++ EnsembleDistil: query inference with ONNX"
+    display-html: "SPLADE++ EnsembleDistil: query inference with ONNX"
+    display-row: "[2]"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-splade-pp-ed --topics $topics --onnx-encoder SpladePlusPlusEnsembleDistil --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3830
+            R@1K: 0.9831
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.5054
+            nDCG@10: 0.7320
+            R@1K: 0.8724
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.5002
+            nDCG@10: 0.7198
+            R@1K: 0.8995
+  - name: splade-pp-sd-onnx
+    display: "SPLADE++ SelfDistil: query inference with ONNX"
+    display-html: "SPLADE++ SelfDistil: query inference with ONNX"
+    display-row: "[2]"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-splade-pp-sd --topics $topics --onnx-encoder SpladePlusPlusSelfDistil --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3778
+            R@1K: 0.9846
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4997
+            nDCG@10: 0.7356
+            R@1K: 0.8758
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.5140
+            nDCG@10: 0.7285
+            R@1K: 0.9023   
+  - name: tct_colbert-v2-hnp-pytorch
+    display: "TCT_ColBERT-V2-HN+: query inference with PyTorch"
+    display-html: "TCT_ColBERT-V2-HN+: query inference with PyTorch"
+    display-row: "[6]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoder castorini/tct_colbert-v2-hnp-msmarco --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3584
+            R@1K: 0.9695
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4469
+            nDCG@10: 0.7204
+            R@1K: 0.8261
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4754
+            nDCG@10: 0.6882
+            R@1K: 0.8429
+  - name: tct_colbert-v2-hnp
+    display: "TCT_ColBERT-V2-HN+: pre-encoded"
+    display-html: "TCT_ColBERT-V2-HN+: pre-encoded queries"
+    display-row: "[6]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoded-queries tct_colbert-v2-hnp-$topics --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3584
+            R@1K: 0.9695
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4469
+            nDCG@10: 0.7204
+            R@1K: 0.8261
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4754
+            nDCG@10: 0.6882
+            R@1K: 0.8429
+  - name: slimr
+    display: "SLIM: query inference with PyTorch"
+    display-html: "SLIM: query inference with PyTorch"
+    display-row: "[7]"
+    command: python -m pyserini.search.lucene --threads 16 --batch 128 --index msmarco-v1-passage-slimr --topics $topics --encoder castorini/slimr-msmarco-passage --encoded-corpus scipy-sparse-vectors.msmarco-v1-passage-slimr --output $output --output-format msmarco --hits 1000 --impact --min-idf 3
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3581
+            R@1K: 0.9620
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4509
+            nDCG@10: 0.7010
+            R@1K: 0.8241
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4419
+            nDCG@10: 0.6403
+            R@1K: 0.8543
+  - name: slimr-pp
+    display: "SLIM++: query inference with PyTorch"
+    display-html: "SLIM++: query inference with PyTorch"
+    display-row: "[7]"
+    command: python -m pyserini.search.lucene --threads 16 --batch 128 --index msmarco-v1-passage-slimr-pp --topics $topics --encoder castorini/slimr-pp-msmarco-passage --encoded-corpus scipy-sparse-vectors.msmarco-v1-passage-slimr-pp --output $output --output-format msmarco --hits 1000 --impact --min-idf 3
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.4032
+            R@1K: 0.9680
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4687
+            nDCG@10: 0.7140
+            R@1K: 0.8415
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4906
+            nDCG@10: 0.7021
+            R@1K: 0.8551
+  - name: aggretriever-distilbert-pytorch
+    display: "Aggretriever-DistilBERT: query inference with PyTorch"
+    display-html: "Aggretriever-DistilBERT: query inference with PyTorch"
+    display-row: "[8]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.aggretriever-distilbert --topics $topics --encoder castorini/aggretriever-distilbert --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3412
+            R@1K: 0.9604
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4301
+            nDCG@10: 0.6816
+            R@1K: 0.8023
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4329
+            nDCG@10: 0.6726
+            R@1K: 0.8351
+  - name: aggretriever-cocondenser-pytorch
+    display: "Aggretriever-coCondenser: query inference with PyTorch"
+    display-html: "Aggretriever-coCondenser: query inference with PyTorch"
+    display-row: "[8]"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.aggretriever-cocondenser --topics $topics --encoder castorini/aggretriever-cocondenser --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3619
+            R@1K: 0.9735
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4350
+            nDCG@10: 0.6837
+            R@1K: 0.8078
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4710
+            nDCG@10: 0.6972
+            R@1K: 0.8555
+  - name: openai-ada2
+    display: "OpenAI ada2: pre-encoded queries"
+    display-html: "OpenAI ada2: pre-encoded queries"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 128 --index msmarco-v1-passage.openai-ada2 --topics $topics --encoded-queries openai-ada2-$topics --output $output
+    topics:
+      - topic_key: msmarco-passage-dev-subset
+        eval_key: msmarco-passage-dev-subset
+        scores:
+          - MRR@10: 0.3435
+            R@1K: 0.9858
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.4788
+            nDCG@10: 0.7035
+            R@1K: 0.8629
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4771
+            nDCG@10: 0.6759
+            R@1K: 0.8705
+  - name: openai-ada2-hyde
+    display: "HyDE-OpenAI ada2: pre-encoded queries"
+    display-html: "HyDE-OpenAI ada2: pre-encoded queries"
+    command: python -m pyserini.search.faiss --threads 16 --batch-size 128 --index msmarco-v1-passage.openai-ada2 --topics $topics --encoded-queries openai-ada2-$topics-hyde --output $output
+    topics:
+      - topic_key: dl19-passage
+        eval_key: dl19-passage
+        scores:
+          - MAP: 0.5125
+            nDCG@10: 0.7163
+            R@1K: 0.9002
+      - topic_key: dl20
+        eval_key: dl20-passage
+        scores:
+          - MAP: 0.4938
+            nDCG@10: 0.6666
+            R@1K: 0.8919
\ No newline at end of file
diff --git a/pyserini/2cr/msmarco-v2-doc.yaml b/pyserini/2cr/msmarco-v2-doc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6287f8a3be2102db1e55eaea5df5242fd5a15ff7
--- /dev/null
+++ b/pyserini/2cr/msmarco-v2-doc.yaml
@@ -0,0 +1,287 @@
+conditions:
+  - name: bm25-doc-default
+    display: BM25 doc (k1=0.9, b=0.4)
+    display-html: BM25 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1a)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.1572
+            R@1K: 0.8054
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.1659
+            R@1K: 0.8029
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2126
+            nDCG@10: 0.5116
+            MRR@100: 0.8367
+            R@100: 0.3195
+            R@1K: 0.6739
+  - name: bm25-doc-segmented-default
+    display: BM25 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1b)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.1896
+            R@1K: 0.8542
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.1930
+            R@1K: 0.8549
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2436
+            nDCG@10: 0.5776
+            MRR@100: 0.8937
+            R@100: 0.3478
+            R@1K: 0.6930
+  - name: bm25-rm3-doc-default
+    display: BM25+RM3 doc (k1=0.9, b=0.4)
+    display-html: BM25+RM3 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1c)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.0974
+            R@1K: 0.7699
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.1033
+            R@1K: 0.7736
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2452
+            nDCG@10: 0.5304
+            MRR@100: 0.7914
+            R@100: 0.3376
+            R@1K: 0.7341
+  - name: bm25-rm3-doc-segmented-default
+    display: BM25+RM3 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1d)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.1660
+            R@1K: 0.8608
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.1702
+            R@1K: 0.8639
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2936
+            nDCG@10: 0.6189
+            MRR@100: 0.9076
+            R@100: 0.3890
+            R@1K: 0.7678
+  - name: bm25-d2q-t5-doc-default
+    display: BM25 w/ doc2query-T5 doc (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2a)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-d2q-t5 --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.2011
+            R@1K: 0.8614
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.2012
+            R@1K: 0.8568
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2387
+            nDCG@10: 0.5792
+            MRR@100: 0.8866
+            R@100: 0.3443
+            R@1K: 0.7066
+  - name: bm25-d2q-t5-doc-segmented-default
+    display: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2b)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-d2q-t5 --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.2226
+            R@1K: 0.8982
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.2234
+            R@1K: 0.8952
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2683
+            nDCG@10: 0.6289
+            MRR@100: 0.9454
+            R@100: 0.3656
+            R@1K: 0.7202
+  - name: bm25-rm3-d2q-t5-doc-default
+    display: BM25+RM3 w/ doc2query-T5 doc (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 doc (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2c)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.1141
+            R@1K: 0.8191
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.1170
+            R@1K: 0.8247
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2611
+            nDCG@10: 0.5375
+            MRR@100: 0.8255
+            R@100: 0.3580
+            R@1K: 0.7574
+  - name: bm25-rm3-d2q-t5-doc-segmented-default
+    display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2d)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.1975
+            R@1K: 0.9002
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.1978
+            R@1K: 0.8972
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.3191
+            nDCG@10: 0.6559
+            MRR@100: 0.8989
+            R@100: 0.4131
+            R@1K: 0.7948
+  - name: unicoil-noexp
+    display: "uniCOIL (noexp): pre-encoded"
+    display-html: "uniCOIL (noexp): pre-encoded queries"
+    display-row: (3a)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-unicoil-noexp-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev-unicoil-noexp
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.2231
+            R@1K: 0.8987
+      - topic_key: msmarco-v2-doc-dev2-unicoil-noexp
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.2314
+            R@1K: 0.8995
+      - topic_key: dl21-unicoil-noexp
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2587
+            nDCG@10: 0.6495
+            MRR@100: 0.9282
+            R@100: 0.3563
+            R@1K: 0.6787
+  - name: unicoil-noexp-otf
+    display: "uniCOIL (noexp): query inference with PyTorch"
+    display-html: "uniCOIL (noexp): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.2232
+            R@1K: 0.8987
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.2314
+            R@1K: 0.8993
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2589
+            nDCG@10: 0.6501
+            MRR@100: 0.9282
+            R@100: 0.3574
+            R@1K: 0.6782
+  - name: unicoil
+    display: "uniCOIL (w/ doc2query-T5): pre-encoded"
+    display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+    display-row: (3b)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-unicoil-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev-unicoil
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.2419
+            R@1K: 0.9122
+      - topic_key: msmarco-v2-doc-dev2-unicoil
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.2445
+            R@1K: 0.9172
+      - topic_key: dl21-unicoil
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2718
+            nDCG@10: 0.6783
+            MRR@100: 0.9684
+            R@100: 0.3700
+            R@1K: 0.7069
+  - name: unicoil-otf
+    display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
+    topics:
+      - topic_key: msmarco-v2-doc-dev
+        eval_key: msmarco-v2-doc-dev
+        scores:
+          - MRR@100: 0.2419
+            R@1K: 0.9120
+      - topic_key: msmarco-v2-doc-dev2
+        eval_key: msmarco-v2-doc-dev2
+        scores:
+          - MRR@100: 0.2447
+            R@1K: 0.9174
+      - topic_key: dl21
+        eval_key: dl21-doc
+        scores:
+          - MAP@100: 0.2720
+            nDCG@10: 0.6782
+            MRR@100: 0.9684
+            R@100: 0.3702
+            R@1K: 0.7071
diff --git a/pyserini/2cr/msmarco-v2-passage.yaml b/pyserini/2cr/msmarco-v2-passage.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..06383d3657b9d926d96e914b9597c02ea5c396d6
--- /dev/null
+++ b/pyserini/2cr/msmarco-v2-passage.yaml
@@ -0,0 +1,287 @@
+conditions:
+  - name: bm25-default
+    display: BM25 original passage (k1=0.9, b=0.4)
+    display-html: BM25 original passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1a)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.0719
+            R@1K: 0.5733
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.0802
+            R@1K: 0.5839
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.1357
+            nDCG@10: 0.4458
+            MRR@100: 0.5060
+            R@100: 0.3261
+            R@1K: 0.6149
+  - name: bm25-augmented-default
+    display: BM25 augmented passage (k1=0.9, b=0.4)
+    display-html: BM25 augmented passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1b)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-augmented --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.0872
+            R@1K: 0.6925
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.0917
+            R@1K: 0.6933
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.0977
+            nDCG@10: 0.3977
+            MRR@100: 0.5303
+            R@100: 0.2709
+            R@1K: 0.5835
+  - name: bm25-rm3-default
+    display: BM25+RM3 original passage (k1=0.9, b=0.4)
+    display-html: BM25+RM3 original passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1c)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.0630
+            R@1K: 0.5947
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.0659
+            R@1K: 0.6062
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.1666
+            nDCG@10: 0.4455
+            MRR@100: 0.5202
+            R@100: 0.3499
+            R@1K: 0.6616
+  - name: bm25-rm3-augmented-default
+    display: BM25+RM3 augmented passage (k1=0.9, b=0.4)
+    display-html: BM25+RM3 augmented passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (1d)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-augmented --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.0667
+            R@1K: 0.6857
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.0700
+            R@1K: 0.6826
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.1050
+            nDCG@10: 0.3869
+            MRR@100: 0.4915
+            R@100: 0.2807
+            R@1K: 0.6298
+  - name: bm25-d2q-t5-default
+    display: BM25 w/ doc2query-T5 original passage (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 original passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2a)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-d2q-t5 --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1072
+            R@1K: 0.7083
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1123
+            R@1K: 0.7151
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.1874
+            nDCG@10: 0.4816
+            MRR@100: 0.6848
+            R@100: 0.4076
+            R@1K: 0.7078
+  - name: bm25-d2q-t5-augmented-default
+    display: BM25 w/ doc2query-T5 augmented passage (k1=0.9, b=0.4)
+    display-html: BM25 w/ doc2query-T5 augmented passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2b)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-augmented-d2q-t5 --topics $topics --output $output --bm25
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1172
+            R@1K: 0.7647
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1170
+            R@1K: 0.7659
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.1649
+            nDCG@10: 0.4702
+            MRR@100: 0.6391
+            R@100: 0.3883
+            R@1K: 0.6962
+  - name: bm25-rm3-d2q-t5-default
+    display: BM25+RM3 w/ doc2query-T5 original passage (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 original passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2c)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.0947
+            R@1K: 0.7181
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.0984
+            R@1K: 0.7222
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.2285
+            nDCG@10: 0.5098
+            MRR@100: 0.6548
+            R@100: 0.4499
+            R@1K: 0.7537
+  - name: bm25-rm3-d2q-t5-augmented-default
+    display: BM25+RM3 w/ doc2query-T5 augmented passage (k1=0.9, b=0.4)
+    display-html: BM25+RM3 w/ doc2query-T5 augmented passage (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)
+    display-row: (2d)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-augmented-d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.0883
+            R@1K: 0.7607
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.0904
+            R@1K: 0.7649
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.1930
+            nDCG@10: 0.4812
+            MRR@100: 0.5958
+            R@100: 0.4321
+            R@1K: 0.7672
+  - name: unicoil
+    display: "uniCOIL (w/ doc2query-T5): pre-encoded"
+    display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+    display-row: (3b)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-unicoil-0shot --topics $topics --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-v2-passage-dev-unicoil
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1499
+            R@1K: 0.7616
+      - topic_key: msmarco-v2-passage-dev2-unicoil
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1577
+            R@1K: 0.7671
+      - topic_key: dl21-unicoil
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.2538
+            nDCG@10: 0.6159
+            MRR@100: 0.7311
+            R@100: 0.4731
+            R@1K: 0.7551
+  - name: unicoil-otf
+    display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1501
+            R@1K: 0.7613
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1576
+            R@1K: 0.7676
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.2539
+            nDCG@10: 0.6160
+            MRR@100: 0.7311
+            R@100: 0.4723
+            R@1K: 0.7560
+  - name: unicoil-noexp
+    display: "uniCOIL (noexp): pre-encoded"
+    display-html: "uniCOIL (noexp): pre-encoded queries"
+    display-row: (3a)
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-unicoil-noexp-0shot --topics $topics --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-v2-passage-dev-unicoil-noexp
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1342
+            R@1K: 0.7010
+      - topic_key: msmarco-v2-passage-dev2-unicoil-noexp
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1385
+            R@1K: 0.7114
+      - topic_key: dl21-unicoil-noexp
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.2193
+            nDCG@10: 0.5756
+            MRR@100: 0.6991
+            R@100: 0.4246
+            R@1K: 0.6897
+  - name: unicoil-noexp-otf
+    display: "uniCOIL (noexp): query inference with PyTorch"
+    display-html: "uniCOIL (noexp): query inference with PyTorch"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1343
+            R@1K: 0.7010
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1385
+            R@1K: 0.7114
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.2194
+            nDCG@10: 0.5759
+            MRR@100: 0.6991
+            R@100: 0.4247
+            R@1K: 0.6893
diff --git a/pyserini/2cr/msmarco.py b/pyserini/2cr/msmarco.py
new file mode 100644
index 0000000000000000000000000000000000000000..acc0e5e31a3197a9425735af1f344919ca5abd12
--- /dev/null
+++ b/pyserini/2cr/msmarco.py
@@ -0,0 +1,600 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import math
+import os
+import re
+import sys
+import time
+from collections import defaultdict
+from string import Template
+
+import pkg_resources
+import yaml
+
+from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
+
+# The models: the rows of the results table will be ordered this way.
+models = {
+    # MS MARCO v1 passage
+    'msmarco-v1-passage':
+    ['bm25-default',
+     'bm25-rm3-default',
+     'bm25-rocchio-default',
+     '',
+     'bm25-tuned',
+     'bm25-rm3-tuned',
+     'bm25-rocchio-tuned',
+     '',
+     'bm25-d2q-t5-default',
+     'bm25-rm3-d2q-t5-default',
+     'bm25-rocchio-d2q-t5-default',
+     '',
+     'bm25-d2q-t5-tuned',
+     'bm25-rm3-d2q-t5-tuned',
+     'bm25-rocchio-d2q-t5-tuned',
+     '',
+     'unicoil',
+     'unicoil-pytorch',
+     'unicoil-onnx',
+     'unicoil-noexp',
+     'unicoil-noexp-pytorch',
+     'unicoil-noexp-onnx',
+     '',
+     'splade-pp-ed-onnx',
+     'splade-pp-sd-onnx',
+     '',
+     'ance',
+     'ance-pytorch',
+     '',
+     'distilbert-kd',
+     'distilbert-kd-pytorch',
+     'distilbert-kd-tasb',
+     'distilbert-kd-tasb-pytorch',
+     '',
+     'tct_colbert-v2-hnp',
+     'tct_colbert-v2-hnp-pytorch',
+     '',
+     'slimr',
+     'slimr-pp',
+     '',
+     'aggretriever-distilbert-pytorch',
+     'aggretriever-cocondenser-pytorch',
+     '',
+     'openai-ada2',
+     'openai-ada2-hyde'],
+
+    # MS MARCO v1 doc
+    'msmarco-v1-doc':
+    ['bm25-doc-default',
+     'bm25-doc-segmented-default',
+     'bm25-rm3-doc-default',
+     'bm25-rm3-doc-segmented-default',
+     'bm25-rocchio-doc-default',
+     'bm25-rocchio-doc-segmented-default',
+     '',
+     'bm25-doc-tuned',
+     'bm25-doc-segmented-tuned',
+     'bm25-rm3-doc-tuned',
+     'bm25-rm3-doc-segmented-tuned',
+     'bm25-rocchio-doc-tuned',
+     'bm25-rocchio-doc-segmented-tuned',
+     '',
+     'bm25-d2q-t5-doc-default',
+     'bm25-d2q-t5-doc-segmented-default',
+     'bm25-rm3-d2q-t5-doc-default',
+     'bm25-rm3-d2q-t5-doc-segmented-default',
+     '',
+     'bm25-d2q-t5-doc-tuned',
+     'bm25-d2q-t5-doc-segmented-tuned',
+     'bm25-rm3-d2q-t5-doc-tuned',
+     'bm25-rm3-d2q-t5-doc-segmented-tuned',
+     '',
+     'unicoil-noexp',
+     'unicoil-noexp-pytorch',
+     '',
+     'unicoil',
+     'unicoil-pytorch'],
+
+    # MS MARCO v2 passage
+    'msmarco-v2-passage':
+    ['bm25-default',
+     'bm25-augmented-default',
+     'bm25-rm3-default',
+     'bm25-rm3-augmented-default',
+     '',
+     'bm25-d2q-t5-default',
+     'bm25-d2q-t5-augmented-default',
+     'bm25-rm3-d2q-t5-default',
+     'bm25-rm3-d2q-t5-augmented-default',
+     '',
+     'unicoil-noexp',
+     'unicoil',
+     '',
+     'unicoil-noexp-otf',
+     'unicoil-otf'],
+
+    # MS MARCO v2 doc
+    'msmarco-v2-doc':
+    ['bm25-doc-default',
+     'bm25-doc-segmented-default',
+     'bm25-rm3-doc-default',
+     'bm25-rm3-doc-segmented-default',
+     '',
+     'bm25-d2q-t5-doc-default',
+     'bm25-d2q-t5-doc-segmented-default',
+     'bm25-rm3-d2q-t5-doc-default',
+     'bm25-rm3-d2q-t5-doc-segmented-default',
+     '',
+     'unicoil-noexp',
+     'unicoil',
+     '',
+     'unicoil-noexp-otf',
+     'unicoil-otf'
+     ]
+}
+
+trec_eval_metric_definitions = {
+    'msmarco-v1-passage': {
+        'msmarco-passage-dev-subset': {
+            'MRR@10': '-c -M 10 -m recip_rank',
+            'R@1K': '-c -m recall.1000'
+        },
+        'dl19-passage': {
+            'MAP': '-c -l 2 -m map',
+            'nDCG@10': '-c -m ndcg_cut.10',
+            'R@1K': '-c -l 2 -m recall.1000'
+        },
+        'dl20-passage': {
+            'MAP': '-c -l 2 -m map',
+            'nDCG@10': '-c -m ndcg_cut.10',
+            'R@1K': '-c -l 2 -m recall.1000'
+        }
+    },
+    'msmarco-v1-doc': {
+        'msmarco-doc-dev': {
+            'MRR@10': '-c -M 100 -m recip_rank',
+            'R@1K': '-c -m recall.1000'
+        },
+        'dl19-doc': {
+            'MAP': '-c -M 100 -m map',
+            'nDCG@10': '-c -m ndcg_cut.10',
+            'R@1K': '-c -m recall.1000'
+        },
+        'dl20-doc': {
+            'MAP': '-c -M 100 -m map',
+            'nDCG@10': '-c -m ndcg_cut.10',
+            'R@1K': '-c -m recall.1000'
+        }
+    },
+    'msmarco-v2-passage': {
+        'msmarco-v2-passage-dev': {
+            'MRR@100': '-c -M 100 -m recip_rank',
+            'R@1K': '-c -m recall.1000'
+        },
+        'msmarco-v2-passage-dev2': {
+            'MRR@100': '-c -M 100 -m recip_rank',
+            'R@1K': '-c -m recall.1000'
+        },
+        'dl21-passage': {
+            'MAP@100': '-c -l 2 -M 100 -m map',
+            'nDCG@10': '-c -m ndcg_cut.10',
+            'MRR@100': '-c -l 2 -M 100 -m recip_rank',
+            'R@100': '-c -l 2 -m recall.100',
+            'R@1K': '-c -l 2 -m recall.1000'
+        }
+    },
+    'msmarco-v2-doc': {
+        'msmarco-v2-doc-dev': {
+            'MRR@100': '-c -M 100 -m recip_rank',
+            'R@1K': '-c -m recall.1000'
+        },
+        'msmarco-v2-doc-dev2': {
+            'MRR@100': '-c -M 100 -m recip_rank',
+            'R@1K': '-c -m recall.1000'
+        },
+        'dl21-doc': {
+            'MAP@100': '-c -M 100 -m map',
+            'nDCG@10': '-c -m ndcg_cut.10',
+            'MRR@100': '-c -M 100 -m recip_rank',
+            'R@100': '-c -m recall.100',
+            'R@1K': '-c -m recall.1000'
+        }
+    }
+}
+
+
+def find_msmarco_table_topic_set_key_v1(topic_key):
+    # E.g., we want to map variants like 'dl19-passage-unicoil' and 'dl19-passage' both into 'dl19'
+    key = ''
+    if topic_key.startswith('dl19'):
+        key = 'dl19'
+    elif topic_key.startswith('dl20'):
+        key = 'dl20'
+    elif topic_key.startswith('msmarco'):
+        key = 'dev'
+
+    return key
+
+
+def find_msmarco_table_topic_set_key_v2(topic_key):
+    key = ''
+    if topic_key.endswith('dev') or topic_key.endswith('dev-unicoil') or topic_key.endswith('dev-unicoil-noexp'):
+        key = 'dev'
+    elif topic_key.endswith('dev2') or topic_key.endswith('dev2-unicoil') or topic_key.endswith('dev2-unicoil-noexp'):
+        key = 'dev2'
+    elif topic_key.startswith('dl21'):
+        key = 'dl21'
+
+    return key
+
+
+def format_command(raw):
+    # After "--output foo.txt" are additional options like "--hits 1000 --impact".
+    # We want these on a separate line for better readability, but note that sometimes that might
+    # be the end of the command, in which case we don't want to add an extra line break.
+    return raw.replace('--topics', '\\\n  --topics') \
+        .replace('--threads', '\\\n  --threads')\
+        .replace('--index', '\\\n  --index')\
+        .replace('--output ', '\\\n  --output ')\
+        .replace('--encoder', '\\\n  --encoder')\
+        .replace('--onnx-encoder', '\\\n  --onnx-encoder')\
+        .replace('--encoded-corpus', '\\\n  --encoded-corpus')\
+        .replace('.txt ', '.txt \\\n  ')
+
+
+def read_file(f):
+    fin = open(f, 'r')
+    text = fin.read()
+    fin.close()
+
+    return text
+
+
+def list_conditions(args):
+    for condition in models[args.collection]:
+        if condition == '':
+            continue
+        print(condition)
+
+
+def generate_report(args):
+    yaml_file = pkg_resources.resource_filename(__name__, f'{args.collection}.yaml')
+
+    if args.collection == 'msmarco-v1-passage':
+        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v1_passage.template'))
+        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v1.template'))
+    elif args.collection == 'msmarco-v1-doc':
+        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v1_doc.template'))
+        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v1.template'))
+    elif args.collection == 'msmarco-v2-passage':
+        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v2_passage.template'))
+        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v2.template'))
+    elif args.collection == 'msmarco-v2-doc':
+        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v2_doc.template'))
+        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v2.template'))
+    else:
+        raise ValueError(f'Unknown corpus: {args.collection}')
+
+    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+    commands = defaultdict(lambda: defaultdict(lambda: ''))
+    eval_commands = defaultdict(lambda: defaultdict(lambda: ''))
+
+    table_keys = {}
+    row_ids = {}
+
+    with open(yaml_file) as f:
+        yaml_data = yaml.safe_load(f)
+        for condition in yaml_data['conditions']:
+            name = condition['name']
+            display = condition['display-html']
+            row_id = condition['display-row'] if 'display-row' in condition else ''
+            cmd_template = condition['command']
+
+            row_ids[name] =row_id
+            table_keys[name] = display
+
+            for topic_set in condition['topics']:
+                topic_key = topic_set['topic_key']
+                eval_key = topic_set['eval_key']
+
+                if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
+                    short_topic_key = find_msmarco_table_topic_set_key_v1(topic_key)
+                else:
+                    short_topic_key = find_msmarco_table_topic_set_key_v2(topic_key)
+
+                runfile = f'run.{args.collection}.{name}.{short_topic_key}.txt'
+                cmd = Template(cmd_template).substitute(topics=topic_key, output=runfile)
+                commands[name][short_topic_key] = cmd
+
+                for expected in topic_set['scores']:
+                    for metric in expected:
+                        eval_cmd = f'python -m pyserini.eval.trec_eval ' + \
+                                   f'{trec_eval_metric_definitions[args.collection][eval_key][metric]} {eval_key} {runfile}'
+                        eval_commands[name][short_topic_key] += eval_cmd + '\n'
+                        table[name][short_topic_key][metric] = expected[metric]
+
+    if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
+        row_cnt = 1
+
+        html_rows = []
+        for name in models[args.collection]:
+            if not name:
+                # Add blank row for spacing
+                html_rows.append('<tr><td style="border-bottom: 0"></td></tr>')
+                continue
+            s = Template(row_template)
+            s = s.substitute(row_cnt=row_cnt,
+                             condition_name=table_keys[name],
+                             row=row_ids[name],
+                             s1=f'{table[name]["dl19"]["MAP"]:.4f}' if table[name]['dl19']['MAP'] != 0 else '-',
+                             s2=f'{table[name]["dl19"]["nDCG@10"]:.4f}' if table[name]['dl19']['nDCG@10'] != 0 else '-',
+                             s3=f'{table[name]["dl19"]["R@1K"]:.4f}' if table[name]['dl19']['R@1K'] != 0 else '-',
+                             s4=f'{table[name]["dl20"]["MAP"]:.4f}' if table[name]['dl20']['MAP'] != 0 else '-',
+                             s5=f'{table[name]["dl20"]["nDCG@10"]:.4f}' if table[name]['dl20']['nDCG@10'] != 0 else '-',
+                             s6=f'{table[name]["dl20"]["R@1K"]:.4f}' if table[name]['dl20']['R@1K'] != 0 else '-',
+                             s7=f'{table[name]["dev"]["MRR@10"]:.4f}' if table[name]['dev']['MRR@10'] != 0 else '-',
+                             s8=f'{table[name]["dev"]["R@1K"]:.4f}' if table[name]['dev']['R@1K'] != 0 else '-',
+                             cmd1=format_command(commands[name]['dl19']),
+                             cmd2=format_command(commands[name]['dl20']),
+                             cmd3=format_command(commands[name]['dev']),
+                             eval_cmd1=eval_commands[name]['dl19'],
+                             eval_cmd2=eval_commands[name]['dl20'],
+                             eval_cmd3=eval_commands[name]['dev']
+                             )
+
+            # If we don't have scores, we want to remove the commands also. Use simple regexp substitution.
+            if table[name]['dl19']['MAP'] == 0:
+                s = re.sub(re.compile('Command to generate run on TREC 2019 queries:.*?</div>',
+                                      re.MULTILINE | re.DOTALL),
+                           'Not available.</div>', s)
+            if table[name]['dl20']['MAP'] == 0:
+                s = re.sub(re.compile('Command to generate run on TREC 2020 queries:.*?</div>',
+                                      re.MULTILINE | re.DOTALL),
+                           'Not available.</div>', s)
+            if table[name]['dev']['MRR@10'] == 0:
+                s = re.sub(re.compile('Command to generate run on dev queries:.*?</div>',
+                                      re.MULTILINE | re.DOTALL),
+                           'Not available.</div>', s)
+
+            html_rows.append(s)
+            row_cnt += 1
+
+        all_rows = '\n'.join(html_rows)
+        if args.collection == 'msmarco-v1-passage':
+            full_name = 'MS MARCO V1 Passage'
+        else:
+            full_name = 'MS MARCO V1 Document'
+
+        with open(args.output, 'w') as out:
+            out.write(Template(html_template).substitute(title=full_name, rows=all_rows))
+    else:
+        row_cnt = 1
+
+        html_rows = []
+        for name in models[args.collection]:
+            if not name:
+                # Add blank row for spacing
+                html_rows.append('<tr><td style="border-bottom: 0"></td></tr>')
+                continue
+            s = Template(row_template)
+            s = s.substitute(row_cnt=row_cnt,
+                             condition_name=table_keys[name],
+                             row=row_ids[name],
+                             s1=f'{table[name]["dl21"]["MAP@100"]:.4f}',
+                             s2=f'{table[name]["dl21"]["nDCG@10"]:.4f}',
+                             s3=f'{table[name]["dl21"]["MRR@100"]:.4f}',
+                             s4=f'{table[name]["dl21"]["R@100"]:.4f}',
+                             s5=f'{table[name]["dl21"]["R@1K"]:.4f}',
+                             s6=f'{table[name]["dev"]["MRR@100"]:.4f}',
+                             s7=f'{table[name]["dev"]["R@1K"]:.4f}',
+                             s8=f'{table[name]["dev2"]["MRR@100"]:.4f}',
+                             s9=f'{table[name]["dev2"]["R@1K"]:.4f}',
+                             cmd1=format_command(commands[name]['dl21']),
+                             cmd2=format_command(commands[name]['dev']),
+                             cmd3=format_command(commands[name]['dev2']),
+                             eval_cmd1=eval_commands[name]['dl21'],
+                             eval_cmd2=eval_commands[name]['dev'],
+                             eval_cmd3=eval_commands[name]['dev2']
+                             )
+            html_rows.append(s)
+            row_cnt += 1
+
+        all_rows = '\n'.join(html_rows)
+        if args.collection == 'msmarco-v2-passage':
+            full_name = 'MS MARCO V2 Passage'
+        else:
+            full_name = 'MS MARCO V2 Document'
+
+        with open(args.output, 'w') as out:
+            out.write(Template(html_template).substitute(title=full_name, rows=all_rows))
+
+
+def run_conditions(args):
+    start = time.time()
+
+    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+    table_keys = {}
+
+    yaml_file = pkg_resources.resource_filename(__name__, f'{args.collection}.yaml')
+
+    with open(yaml_file) as f:
+        yaml_data = yaml.safe_load(f)
+        for condition in yaml_data['conditions']:
+            # Either we're running all conditions, or running only the condition specified in --condition
+            if not args.all:
+                if not condition['name'] == args.condition:
+                    continue
+
+            name = condition['name']
+            display = condition['display']
+            cmd_template = condition['command']
+
+            print(f'# Running condition "{name}": {display}\n')
+            for topic_set in condition['topics']:
+                topic_key = topic_set['topic_key']
+                eval_key = topic_set['eval_key']
+
+                short_topic_key = ''
+                if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
+                    short_topic_key = find_msmarco_table_topic_set_key_v1(topic_key)
+                else:
+                    short_topic_key = find_msmarco_table_topic_set_key_v2(topic_key)
+
+                print(f'  - topic_key: {topic_key}')
+
+                runfile = os.path.join(args.directory, f'run.{args.collection}.{name}.{short_topic_key}.txt')
+                cmd = Template(cmd_template).substitute(topics=topic_key, output=runfile)
+
+                if args.display_commands:
+                    print(f'\n```bash\n{format_command(cmd)}\n```\n')
+
+                if not os.path.exists(runfile):
+                    if not args.dry_run:
+                        os.system(cmd)
+
+                for expected in topic_set['scores']:
+                    for metric in expected:
+                        table_keys[name] = display
+                        if not args.skip_eval:
+                            # If the runfile doesn't exist, we can't evaluate.
+                            # This would be the case if --dry-run were set.
+                            if not os.path.exists(runfile):
+                                continue
+
+                            score = float(
+                                run_eval_and_return_metric(
+                                    metric,
+                                    eval_key,
+                                    trec_eval_metric_definitions[args.collection][eval_key][metric],
+                                    runfile))
+                            if math.isclose(score, float(expected[metric])):
+                                result_str = ok_str
+                            # Flaky tests
+                            elif args.collection == 'msmarco-v1-passage' \
+                                    and topic_key == 'msmarco-passage-dev-subset' and name == 'ance-pytorch' \
+                                    and metric == 'MRR@10' and abs(score-float(expected[metric])) <= 0.0001:
+                                result_str = okish_str
+                            else:
+                                result_str = fail_str + f' expected {expected[metric]:.4f}'
+                            print(f'    {metric:7}: {score:.4f} {result_str}')
+                            table[name][short_topic_key][metric] = score
+                        else:
+                            table[name][short_topic_key][metric] = expected[metric]
+
+                if not args.skip_eval:
+                    print('')
+
+    if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
+        print(' ' * 69 + 'TREC 2019' + ' ' * 16 + 'TREC 2020' + ' ' * 12 + 'MS MARCO dev')
+        print(' ' * 62 + 'MAP    nDCG@10    R@1K       MAP nDCG@10    R@1K    MRR@10    R@1K')
+        print(' ' * 62 + '-' * 22 + '    ' + '-' * 22 + '    ' + '-' * 14)
+
+        if args.condition:
+            # If we've used --condition to specify a specific condition, print out only that row.
+            name = args.condition
+            print(f'{table_keys[name]:60}' +
+                  f'{table[name]["dl19"]["MAP"]:8.4f}{table[name]["dl19"]["nDCG@10"]:8.4f}{table[name]["dl19"]["R@1K"]:8.4f}  ' +
+                  f'{table[name]["dl20"]["MAP"]:8.4f}{table[name]["dl20"]["nDCG@10"]:8.4f}{table[name]["dl20"]["R@1K"]:8.4f}  ' +
+                  f'{table[name]["dev"]["MRR@10"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}')
+        else:
+            # Otherwise, print out all rows
+            for name in models[args.collection]:
+                if not name:
+                    print('')
+                    continue
+                print(f'{table_keys[name]:60}' +
+                      f'{table[name]["dl19"]["MAP"]:8.4f}{table[name]["dl19"]["nDCG@10"]:8.4f}{table[name]["dl19"]["R@1K"]:8.4f}  ' +
+                      f'{table[name]["dl20"]["MAP"]:8.4f}{table[name]["dl20"]["nDCG@10"]:8.4f}{table[name]["dl20"]["R@1K"]:8.4f}  ' +
+                      f'{table[name]["dev"]["MRR@10"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}')
+    else:
+        print(' ' * 77 + 'TREC 2021' + ' ' * 18 + 'MS MARCO dev' + ' ' * 6 + 'MS MARCO dev2')
+        print(' ' * 62 + 'MAP@100 nDCG@10 MRR@100 R@100   R@1K     MRR@100   R@1K    MRR@100   R@1K')
+        print(' ' * 62 + '-' * 38 + '    ' + '-' * 14 + '    ' + '-' * 14)
+
+        if args.condition:
+            # If we've used --condition to specify a specific condition, print out only that row.
+            name = args.condition
+            print(f'{table_keys[name]:60}' +
+                  f'{table[name]["dl21"]["MAP@100"]:8.4f}{table[name]["dl21"]["nDCG@10"]:8.4f}' +
+                  f'{table[name]["dl21"]["MRR@100"]:8.4f}{table[name]["dl21"]["R@100"]:8.4f}{table[name]["dl21"]["R@1K"]:8.4f}  ' +
+                  f'{table[name]["dev"]["MRR@100"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}  ' +
+                  f'{table[name]["dev2"]["MRR@100"]:8.4f}{table[name]["dev2"]["R@1K"]:8.4f}')
+        else:
+            # Otherwise, print out all rows
+            for name in models[args.collection]:
+                if not name:
+                    print('')
+                    continue
+                print(f'{table_keys[name]:60}' +
+                      f'{table[name]["dl21"]["MAP@100"]:8.4f}{table[name]["dl21"]["nDCG@10"]:8.4f}' +
+                      f'{table[name]["dl21"]["MRR@100"]:8.4f}{table[name]["dl21"]["R@100"]:8.4f}{table[name]["dl21"]["R@1K"]:8.4f}  ' +
+                      f'{table[name]["dev"]["MRR@100"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}  ' +
+                      f'{table[name]["dev2"]["MRR@100"]:8.4f}{table[name]["dev2"]["R@1K"]:8.4f}')
+
+    end = time.time()
+
+    print('\n')
+    print(f'Total elapsed time: {end - start:.0f}s')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate regression matrix for MS MARCO corpora.')
+    parser.add_argument('--collection', type=str,
+                        help='Collection = {v1-passage, v1-doc, v2-passage, v2-doc}.', required=True)
+    # To list all conditions
+    parser.add_argument('--list-conditions', action='store_true', default=False, help='List available conditions.')
+    # For generating reports
+    parser.add_argument('--generate-report', action='store_true', default=False, help='Generate report.')
+    parser.add_argument('--output', type=str, help='File to store report.', required=False)
+    # For actually running the experimental conditions
+    parser.add_argument('--all', action='store_true', default=False, help='Run all conditions.')
+    parser.add_argument('--condition', type=str, help='Condition to run.', required=False)
+    parser.add_argument('--directory', type=str, help='Base directory.', default='', required=False)
+    parser.add_argument('--dry-run', action='store_true', default=False, help='Print out commands but do not execute.')
+    parser.add_argument('--skip-eval', action='store_true', default=False, help='Skip running trec_eval.')
+    parser.add_argument('--display-commands', action='store_true', default=False, help='Display command.')
+    args = parser.parse_args()
+
+    if args.collection == 'v1-passage':
+        args.collection = 'msmarco-v1-passage'
+    elif args.collection == 'v1-doc':
+        args.collection = 'msmarco-v1-doc'
+    elif args.collection == 'v2-passage':
+        args.collection = 'msmarco-v2-passage'
+    elif args.collection == 'v2-doc':
+        args.collection = 'msmarco-v2-doc'
+    else:
+        raise ValueError(f'Unknown corpus: {args.collection}')
+
+    if args.list_conditions:
+        list_conditions(args)
+        sys.exit()
+
+    if args.generate_report:
+        if not args.output:
+            print(f'Must specify report filename with --output.')
+            sys.exit()
+
+        generate_report(args)
+        sys.exit()
+
+    if not args.all and not args.condition:
+        print(f'Must specify a specific condition using --condition or use --all to run all conditions.')
+        sys.exit()
+
+    run_conditions(args)
diff --git a/pyserini/2cr/msmarco_html_row_v1.template b/pyserini/2cr/msmarco_html_row_v1.template
new file mode 100644
index 0000000000000000000000000000000000000000..c032b14e4643b2d9b2a7a109d55d0c99611e129d
--- /dev/null
+++ b/pyserini/2cr/msmarco_html_row_v1.template
@@ -0,0 +1,81 @@
+<!-- Condition: ${condition_name} -->
+<tr class="accordion-toggle collapsed" id="row${row_cnt}" data-toggle="collapse" data-parent="#row${row_cnt}" href="#collapse${row_cnt}">
+<td class="expand-button"></td>
+<td style="min-width: 85px">$row</td>
+<td style="min-width: 400px">${condition_name}</td>
+<td>$s1</td>
+<td>$s2</td>
+<td>$s3</td>
+<td></td>
+<td>$s4</td>
+<td>$s5</td>
+<td>$s6</td>
+<td></td>
+<td>$s7</td>
+<td>$s8</td>
+</tr>
+<tr class="hide-table-padding">
+<td></td>
+<td colspan="11">
+<div id="collapse${row_cnt}" class="collapse in p-3">
+
+<!-- Tabs navs -->
+<ul class="nav nav-tabs mb-3" id="row${row_cnt}-tabs" role="tablist">
+  <li class="nav-item" role="presentation">
+    <a class="nav-link active" id="row${row_cnt}-tab1-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab1" role="tab" aria-controls="row${row_cnt}-tab1" aria-selected="true" style="text-transform:none">TREC 2019</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="row${row_cnt}-tab2-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab2" role="tab" aria-controls="row${row_cnt}-tab2" aria-selected="false" style="text-transform:none">TREC 2020</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="row${row_cnt}-tab3-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab3" role="tab" aria-controls="row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">dev</a>
+  </li>
+</ul>
+<!-- Tabs navs -->
+
+<!-- Tabs content -->
+<div class="tab-content" id="row${row_cnt}-content">
+  <div class="tab-pane fade show active" id="row${row_cnt}-tab1" role="tabpanel" aria-labelledby="row${row_cnt}-tab1">
+Command to generate run on TREC 2019 queries:
+
+  <blockquote class="mycode">
+<pre><code>$cmd1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd1}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="row${row_cnt}-tab2" role="tabpanel" aria-labelledby="row${row_cnt}-tab2">
+    Command to generate run on TREC 2020 queries:
+
+  <blockquote class="mycode">
+<pre><code>$cmd2
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd2}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="row${row_cnt}-tab3" role="tabpanel" aria-labelledby="row${row_cnt}-tab3">
+    Command to generate run on dev queries:
+
+  <blockquote class="mycode">
+<pre><code>$cmd3
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd3}</code></pre>
+  </blockquote>
+
+  </div>
+</div>
+<!-- Tabs content -->
+
+</div></td>
+</tr>
\ No newline at end of file
diff --git a/pyserini/2cr/msmarco_html_row_v2.template b/pyserini/2cr/msmarco_html_row_v2.template
new file mode 100644
index 0000000000000000000000000000000000000000..a1442c96f7d105002ead16bf7808a58b3d6ed5ee
--- /dev/null
+++ b/pyserini/2cr/msmarco_html_row_v2.template
@@ -0,0 +1,82 @@
+<!-- Condition: ${condition_name} -->
+<tr class="accordion-toggle collapsed" id="row${row_cnt}" data-toggle="collapse" data-parent="#row${row_cnt}" href="#collapse${row_cnt}">
+<td class="expand-button"></td>
+<td>$row</td>
+<td style="min-width: 400px">${condition_name}</td>
+<td>$s1</td>
+<td>$s2</td>
+<td>$s3</td>
+<td>$s4</td>
+<td>$s5</td>
+<td></td>
+<td>$s6</td>
+<td>$s7</td>
+<td></td>
+<td>$s8</td>
+<td>$s9</td>
+</tr>
+<tr class="hide-table-padding">
+<td></td>
+<td colspan="12">
+<div id="collapse${row_cnt}" class="collapse in p-3">
+
+<!-- Tabs navs -->
+<ul class="nav nav-tabs mb-3" id="row${row_cnt}-tabs" role="tablist">
+  <li class="nav-item" role="presentation">
+    <a class="nav-link active" id="row${row_cnt}-tab1-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab1" role="tab" aria-controls="row${row_cnt}-tab1" aria-selected="true" style="text-transform:none">TREC 2021</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="row${row_cnt}-tab2-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab2" role="tab" aria-controls="row${row_cnt}-tab2" aria-selected="false" style="text-transform:none">dev</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="row${row_cnt}-tab3-header" data-mdb-toggle="tab" href="#row${row_cnt}-tab3" role="tab" aria-controls="row${row_cnt}-tab3" aria-selected="false" style="text-transform:none">dev2</a>
+  </li>
+</ul>
+<!-- Tabs navs -->
+
+<!-- Tabs content -->
+<div class="tab-content" id="row${row_cnt}-content">
+  <div class="tab-pane fade show active" id="row${row_cnt}-tab1" role="tabpanel" aria-labelledby="row${row_cnt}-tab1">
+Command to generate run on TREC 2021 queries:
+
+  <blockquote class="mycode">
+<pre><code>$cmd1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd1}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="row${row_cnt}-tab2" role="tabpanel" aria-labelledby="row${row_cnt}-tab2">
+    Command to generate run on dev queries:
+
+  <blockquote class="mycode">
+<pre><code>$cmd2
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd2}</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="row${row_cnt}-tab3" role="tabpanel" aria-labelledby="row${row_cnt}-tab3">
+    Command to generate run on dev2 queries:
+
+  <blockquote class="mycode">
+<pre><code>$cmd3
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>${eval_cmd3}</code></pre>
+  </blockquote>
+
+  </div>
+</div>
+<!-- Tabs content -->
+
+</div></td>
+</tr>
\ No newline at end of file
diff --git a/pyserini/2cr/msmarco_html_v1_doc.template b/pyserini/2cr/msmarco_html_v1_doc.template
new file mode 100644
index 0000000000000000000000000000000000000000..50a41306828c81177a05ded8eed864a33775f155
--- /dev/null
+++ b/pyserini/2cr/msmarco_html_v1_doc.template
@@ -0,0 +1,296 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>Pyserini Reproductions: MS MARCO V1 Document</title>
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" />
+    <!-- Google Fonts Roboto -->
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" />
+    <!-- MDB -->
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" />
+
+    <style>
+tr.hide-table-padding td {
+  padding: 0;
+}
+
+.expand-button {
+  position: relative;
+}
+
+.accordion-toggle .expand-button:after {
+  position: absolute;
+  left:.75rem;
+  top: 50%;
+  transform: translate(0, -50%);
+  content: '-';
+}
+
+.accordion-toggle.collapsed .expand-button:after {
+  content: '+';
+}
+
+blockquote.mycode {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  margin-top: 15px;
+  padding-left: 15px;
+}
+
+blockquote.mycode2 {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  padding-top: 10px;
+  padding-bottom: 10px;
+  padding-left: 15px;
+}
+
+tr th.headertop {
+  border-bottom: none;
+  padding-bottom: 0rem
+}
+
+tr th.headerbottom {
+  padding-top: 0rem
+}
+
+.table>:not(caption)>*>*{padding:0.75rem 0.75rem}
+
+.copy-code-button {
+	border-radius: 0;
+	min-width: 55px;
+	background: none repeat scroll 0 0 transparent;
+	background-color: grey;
+	color: #F1F2F3 !important;
+	cursor: pointer;
+	border-style: none;
+	font-family: 'HELVETICA',sans-serif;
+	font-size: 0.8em;
+	font-weight: normal;
+	text-align: center;
+	text-decoration: none;
+	text-indent: 0;
+	text-transform: uppercase;
+	font-weight: 500;
+	line-height: 1.42rem;
+	margin: 0;
+	padding: 3px 8px;
+	position: absolute !important;
+	top: 0 !important;
+	right: 0 !important;
+}
+
+.copy-code-button > span {
+	color: #F1F2F3 !important;
+}
+
+.copy-code-button, ::before, ::after {
+	box-sizing: inherit;
+}
+
+.copy-code-button::before {
+	content: '';
+	display: inline-block;
+	width: 16px;
+	height: 16px;
+	margin-right: 3px;
+	background-size: contain;
+	background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg==");
+	background-repeat: no-repeat;
+	position: relative;
+	top: 3px;
+}
+
+.copy-code-button:focus {
+    /* Avoid an ugly focus outline on click in Chrome,
+       but darken the button for accessibility.
+       See https://stackoverflow.com/a/25298082/1481479 */
+    /* background-color: #E6E6E6; */
+	outline: 0;
+}
+
+pre[class*="prettyprint"] {
+	position: relative;
+	overflow: hidden;
+}
+    </style>
+</head>
+<body>
+
+    <!-- Background image -->
+    <div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px">
+      <div class="mask" style="
+            background: linear-gradient(
+              45deg,
+              rgba(29, 236, 197, 0.7),
+              rgba(91, 14, 214, 0.7) 100%
+            );
+          ">
+        <div class="container d-flex align-items-center justify-content-center text-center h-100"  style="max-height: 150px">
+          <div class="text-white">
+            <h1 class="mb-3">$title</h1>
+          </div>
+        </div>
+      </div>
+    </div>
+    <!-- Background image -->
+
+<div class="container my-4">
+
+<p>The two-click<a href="#" data-mdb-toggle="tooltip" title="What are the two clicks, you ask? Copy and paste!"><sup>*</sup></a> reproduction matrix below provides commands for reproducing experimental results reported in a number of papers, denoted by the references in square brackets.
+Instructions for programmatic execution are shown at the bottom of this page (scroll down).</p>
+
+<div class="table-responsive">
+  <table class="table">
+    <thead>
+      <tr>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop" colspan="4"><b>TREC 2019</b></th>
+        <th class="headertop" colspan="4"><b>TREC 2020</b></th>
+        <th class="headertop" colspan="3"><b>dev</b></th>
+      </tr>
+      <tr>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"><br/>AP@100</th>
+        <th class="headerbottom" scope="col">nDCG@10</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"><br/>AP@100</th>
+        <th class="headerbottom" scope="col">nDCG@10</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+      </tr>
+    </thead>
+    <tbody>
+
+$rows
+
+    </tbody>
+  </table>
+</div>
+
+<ul style="list-style-type:none; padding-top: 25px">
+
+<li><p>[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin.
+<a href="https://cs.uwaterloo.ca/~jimmylin/publications/Ma_etal_SIGIR2022.pdf">Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.</a>
+<i>Proceedings of the 45th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022)</i>, July 2022.</p>
+
+<p>&nbsp; &nbsp; &nbsp; &nbsp;Numbers in parentheses correspond to rows in Table 2 of the paper.</p></li>
+
+</ul>
+
+<div style="padding-top: 20px"/>
+
+<h4>Programmatic Execution</h4>
+
+<p>All experimental runs shown in the above table can be programmatically executed based on the instructions below.
+To list all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-doc --list-conditions
+</tt></blockquote>
+
+<p>These conditions correspond to the table rows above.</p>
+
+<p>For all conditions, just show the commands in a "dry run":</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-doc --all --display-commands --dry-run
+</tt></blockquote>
+
+<p>To actually run all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-doc --all --display-commands
+</tt></blockquote>
+
+<p>With the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>To show the commands for a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-doc --condition bm25-doc-default --display-commands --dry-run
+</tt></blockquote>
+
+<p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p>
+
+<p>To actually run a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-doc --condition bm25-doc-default --display-commands
+</tt></blockquote>
+
+<p>Again, with the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>Finally, to generate this page:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-doc --generate-report --output msmarco-v1-doc.html
+</tt></blockquote>
+
+<p>The output file <tt>msmarco-v1-doc.html</tt> should be identical to this page.</p>
+
+<div style="padding-top: 50px"/>
+
+      </div>
+
+
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script>
+    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script>
+
+<script>
+document.querySelectorAll('pre').forEach(function (codeBlock) {
+    var button = document.createElement('button');
+    button.className = 'copy-code-button';
+    button.type = 'button';
+    var s = codeBlock.innerText;
+    button.setAttribute('data-clipboard-text',s);
+    button.innerText = 'Copy';
+
+    // var pre = codeBlock.parentNode;
+    codeBlock.classList.add('prettyprint');
+    // pre.parentNode.insertBefore(button, pre);
+    codeBlock.appendChild(button);
+});
+
+var clipboard = new ClipboardJS('.copy-code-button');
+
+clipboard.on('success', function(e) {
+  console.info('Action:', e.action);
+  console.info('Text:', e.text);
+  console.info('Trigger:', e.trigger);
+  e.trigger.textContent = 'Copied';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+clipboard.on('error', function(e) {
+  console.error('Action:', e.action);
+  console.error('Trigger:', e.trigger);
+  e.trigger.textContent = 'Error Copying';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+</script>
+
+</body>
+</html>
diff --git a/pyserini/2cr/msmarco_html_v1_passage.template b/pyserini/2cr/msmarco_html_v1_passage.template
new file mode 100644
index 0000000000000000000000000000000000000000..b3cd36bc52e2b556693bb2f2243c3049e98d3a33
--- /dev/null
+++ b/pyserini/2cr/msmarco_html_v1_passage.template
@@ -0,0 +1,325 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>Pyserini Reproductions: MS MARCO V1 Passage</title>
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" />
+    <!-- Google Fonts Roboto -->
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" />
+    <!-- MDB -->
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" />
+
+    <style>
+tr.hide-table-padding td {
+  padding: 0;
+}
+
+.expand-button {
+  position: relative;
+}
+
+.accordion-toggle .expand-button:after {
+  position: absolute;
+  left:.75rem;
+  top: 50%;
+  transform: translate(0, -50%);
+  content: '-';
+}
+
+.accordion-toggle.collapsed .expand-button:after {
+  content: '+';
+}
+
+blockquote.mycode {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  margin-top: 15px;
+  padding-left: 15px;
+}
+
+blockquote.mycode2 {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  padding-top: 10px;
+  padding-bottom: 10px;
+  padding-left: 15px;
+}
+
+tr th.headertop {
+  border-bottom: none;
+  padding-bottom: 0rem
+}
+
+tr th.headerbottom {
+  padding-top: 0rem
+}
+
+.table>:not(caption)>*>*{padding:0.75rem 0.75rem}
+
+.copy-code-button {
+	border-radius: 0;
+	min-width: 55px;
+	background: none repeat scroll 0 0 transparent;
+	background-color: grey;
+	color: #F1F2F3 !important;
+	cursor: pointer;
+	border-style: none;
+	font-family: 'HELVETICA',sans-serif;
+	font-size: 0.8em;
+	font-weight: normal;
+	text-align: center;
+	text-decoration: none;
+	text-indent: 0;
+	text-transform: uppercase;
+	font-weight: 500;
+	line-height: 1.42rem;
+	margin: 0;
+	padding: 3px 8px;
+	position: absolute !important;
+	top: 0 !important;
+	right: 0 !important;
+}
+
+.copy-code-button > span {
+	color: #F1F2F3 !important;
+}
+
+.copy-code-button, ::before, ::after {
+	box-sizing: inherit;
+}
+
+.copy-code-button::before {
+	content: '';
+	display: inline-block;
+	width: 16px;
+	height: 16px;
+	margin-right: 3px;
+	background-size: contain;
+	background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg==");
+	background-repeat: no-repeat;
+	position: relative;
+	top: 3px;
+}
+
+.copy-code-button:focus {
+    /* Avoid an ugly focus outline on click in Chrome,
+       but darken the button for accessibility.
+       See https://stackoverflow.com/a/25298082/1481479 */
+    /* background-color: #E6E6E6; */
+	outline: 0;
+}
+
+pre[class*="prettyprint"] {
+	position: relative;
+	overflow: hidden;
+}
+    </style>
+</head>
+<body>
+
+    <!-- Background image -->
+    <div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px">
+      <div class="mask" style="
+            background: linear-gradient(
+              45deg,
+              rgba(29, 236, 197, 0.7),
+              rgba(91, 14, 214, 0.7) 100%
+            );
+          ">
+        <div class="container d-flex align-items-center justify-content-center text-center h-100"  style="max-height: 150px">
+          <div class="text-white">
+            <h1 class="mb-3">$title</h1>
+          </div>
+        </div>
+      </div>
+    </div>
+    <!-- Background image -->
+
+<div class="container my-4">
+
+<p>The two-click<a href="#" data-mdb-toggle="tooltip" title="What are the two clicks, you ask? Copy and paste!"><sup>*</sup></a> reproduction matrix below provides commands for reproducing experimental results reported in a number of papers, denoted by the references in square brackets.
+Instructions for programmatic execution are shown at the bottom of this page (scroll down).</p>
+
+<div class="table-responsive">
+  <table class="table">
+    <thead>
+      <tr>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop" colspan="4"><b>TREC 2019</b></th>
+        <th class="headertop" colspan="4"><b>TREC 2020</b></th>
+        <th class="headertop" colspan="3"><b>dev</b></th>
+      </tr>
+      <tr>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"><br/>AP</th>
+        <th class="headerbottom" scope="col">nDCG@10</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"><br/>AP</th>
+        <th class="headerbottom" scope="col">nDCG@10</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col">RR@10</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+
+      </tr>
+    </thead>
+    <tbody>
+
+$rows
+
+    </tbody>
+  </table>
+</div>
+
+<ul style="list-style-type:none; padding-top: 25px">
+
+<li><p>[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin.
+<a href="https://cs.uwaterloo.ca/~jimmylin/publications/Ma_etal_SIGIR2022.pdf">Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.</a>
+<i>Proceedings of the 45th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022)</i>, July 2022.</p>
+
+<p>&nbsp; &nbsp; &nbsp; &nbsp;Numbers in parentheses correspond to rows in Table 1 of the paper.</p></li>
+
+<li><p>[2] Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and Stéphane Clinchant
+<a href="https://arxiv.org/abs/2205.04733">From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective.</a>
+<i>Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022), May 2022.</i></p></li>
+
+<li><p>[3] Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N. Bennett, Junaid Ahmed, and Arnold Overwijk.
+<a href="https://openreview.net/forum?id=zeFrfgyZln">Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval.</a>
+<i>Proceedings of the 9th International Conference on Learning Representations (ICLR 2021), May 2021.</i></p></li>
+
+<li><p>[4] Sebastian Hofstätter, Sophia Althammer, Michael Schröder, Mete Sertkan, and Allan Hanbury.
+<a href="https://arxiv.org/abs/2010.02666">Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation.</a>
+<i>arXiv:2010.02666</i>, October 2020.</p></li>
+
+<li><p>[5] Sebastian Hofstätter, Sheng-Chieh Lin, Jheng-Hong Yang, Jimmy Lin, and Allan Hanbury.
+<a href="https://dl.acm.org/doi/10.1145/3404835.3462891">Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling.</a>
+<i>Proceedings of the 44th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2021)</i>, pages 113-122, July 2021.</p></li>
+
+<li><p>[6] Sheng-Chieh Lin, Jheng-Hong Yang, and Jimmy Lin.
+<a href="https://aclanthology.org/2021.repl4nlp-1.17/">In-Batch Negatives for Knowledge Distillation with Tightly-Coupled Teachers for Dense Retrieval.</a>
+<i>Proceedings of the 6th Workshop on Representation Learning for NLP (RepL4NLP-2021)</i>, pages 163-173, August 2021.</p></li>
+
+<li><p>[7] Minghan Li, Sheng-Chieh Lin, Xueguang Ma, Jimmy Lin.
+<a href="https://arxiv.org/abs/2302.06587">SLIM: Sparsified Late Interaction for Multi-Vector Retrieval with Inverted Indexes.</a>
+<i>arXiv:2302.06587</i>, Feburary 2023.</p></li>
+
+<li><p>[8] Sheng-Chieh Lin, Minghan Li and Jimmy Lin.
+<a href="https://arxiv.org/abs/2208.00511">Aggretriever: A Simple Approach to Aggregate Textual Representation for Robust Dense Passage Retrieval.</a>
+<i>arXiv:2208.00511</i>, July 2022.</p></li>
+
+</ul>
+
+<div style="padding-top: 20px"/>
+
+<h4>Programmatic Execution</h4>
+
+<p>All experimental runs shown in the above table can be programmatically executed based on the instructions below.
+To list all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-passage --list-conditions
+</tt></blockquote>
+
+<p>These conditions correspond to the table rows above.</p>
+
+<p>For all conditions, just show the commands in a "dry run":</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-passage --all --display-commands --dry-run
+</tt></blockquote>
+
+<p>To actually run all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-passage --all --display-commands
+</tt></blockquote>
+
+<p>With the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>To show the commands for a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-passage --condition bm25-default --display-commands --dry-run
+</tt></blockquote>
+
+<p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p>
+
+<p>To actually run a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-passage --condition bm25-default --display-commands
+</tt></blockquote>
+
+<p>Again, with the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>Finally, to generate this page:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v1-passage --generate-report --output msmarco-v1-passage.html
+</tt></blockquote>
+
+<p>The output file <tt>msmarco-v1-passage.html</tt> should be identical to this page.</p>
+
+<div style="padding-top: 50px"/>
+
+      </div>
+
+
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script>
+    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script>
+
+<script>
+document.querySelectorAll('pre').forEach(function (codeBlock) {
+    var button = document.createElement('button');
+    button.className = 'copy-code-button';
+    button.type = 'button';
+    var s = codeBlock.innerText;
+    button.setAttribute('data-clipboard-text',s);
+    button.innerText = 'Copy';
+
+    // var pre = codeBlock.parentNode;
+    codeBlock.classList.add('prettyprint');
+    // pre.parentNode.insertBefore(button, pre);
+    codeBlock.appendChild(button);
+});
+
+var clipboard = new ClipboardJS('.copy-code-button');
+
+clipboard.on('success', function(e) {
+  console.info('Action:', e.action);
+  console.info('Text:', e.text);
+  console.info('Trigger:', e.trigger);
+  e.trigger.textContent = 'Copied';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+clipboard.on('error', function(e) {
+  console.error('Action:', e.action);
+  console.error('Trigger:', e.trigger);
+  e.trigger.textContent = 'Error Copying';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+</script>
+
+</body>
+</html>
diff --git a/pyserini/2cr/msmarco_html_v2_doc.template b/pyserini/2cr/msmarco_html_v2_doc.template
new file mode 100644
index 0000000000000000000000000000000000000000..a3379e14af7046a33e4e9f81b70cf9c3dca9de32
--- /dev/null
+++ b/pyserini/2cr/msmarco_html_v2_doc.template
@@ -0,0 +1,292 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>Pyserini Reproductions: MS MARCO V2 Document</title>
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" />
+    <!-- Google Fonts Roboto -->
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" />
+    <!-- MDB -->
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" />
+
+    <style>
+tr.hide-table-padding td {
+  padding: 0;
+}
+
+.expand-button {
+  position: relative;
+}
+
+.accordion-toggle .expand-button:after {
+  position: absolute;
+  left:.75rem;
+  top: 50%;
+  transform: translate(0, -50%);
+  content: '-';
+}
+
+.accordion-toggle.collapsed .expand-button:after {
+  content: '+';
+}
+
+blockquote.mycode {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  margin-top: 15px;
+  padding-left: 15px;
+}
+
+blockquote.mycode2 {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  padding-top: 10px;
+  padding-bottom: 10px;
+  padding-left: 15px;
+}
+
+tr th.headertop {
+  border-bottom: none;
+  padding-bottom: 0rem
+}
+
+tr th.headerbottom {
+  padding-top: 0rem
+}
+
+.table>:not(caption)>*>*{padding:0.75rem 0.75rem}
+
+.copy-code-button {
+	border-radius: 0;
+	min-width: 55px;
+	background: none repeat scroll 0 0 transparent;
+	background-color: grey;
+	color: #F1F2F3 !important;
+	cursor: pointer;
+	border-style: none;
+	font-family: 'HELVETICA',sans-serif;
+	font-size: 0.8em;
+	font-weight: normal;
+	text-align: center;
+	text-decoration: none;
+	text-indent: 0;
+	text-transform: uppercase;
+	font-weight: 500;
+	line-height: 1.42rem;
+	margin: 0;
+	padding: 3px 8px;
+	position: absolute !important;
+	top: 0 !important;
+	right: 0 !important;
+}
+
+.copy-code-button > span {
+	color: #F1F2F3 !important;
+}
+
+.copy-code-button, ::before, ::after {
+	box-sizing: inherit;
+}
+
+.copy-code-button::before {
+	content: '';
+	display: inline-block;
+	width: 16px;
+	height: 16px;
+	margin-right: 3px;
+	background-size: contain;
+	background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg==");
+	background-repeat: no-repeat;
+	position: relative;
+	top: 3px;
+}
+
+.copy-code-button:focus {
+    /* Avoid an ugly focus outline on click in Chrome,
+       but darken the button for accessibility.
+       See https://stackoverflow.com/a/25298082/1481479 */
+    /* background-color: #E6E6E6; */
+	outline: 0;
+}
+
+pre[class*="prettyprint"] {
+	position: relative;
+	overflow: hidden;
+}
+    </style>
+</head>
+<body>
+
+    <!-- Background image -->
+    <div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px">
+      <div class="mask" style="
+            background: linear-gradient(
+              45deg,
+              rgba(29, 236, 197, 0.7),
+              rgba(91, 14, 214, 0.7) 100%
+            );
+          ">
+        <div class="container d-flex align-items-center justify-content-center text-center h-100"  style="max-height: 150px">
+          <div class="text-white">
+            <h1 class="mb-3">$title</h1>
+          </div>
+        </div>
+      </div>
+    </div>
+    <!-- Background image -->
+
+<div class="container my-4">
+
+<p>The two-click<a href="#" data-mdb-toggle="tooltip" title="What are the two clicks, you ask? Copy and paste!"><sup>*</sup></a> reproduction matrix below provides commands for reproducing experimental results reported in the following paper.
+Numbered rows correspond to tables in the paper; additional conditions are provided for comparison purposes.</p>
+
+<p class="note note-light">Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. <a href="https://cs.uwaterloo.ca/~jimmylin/publications/Ma_etal_SIGIR2022.pdf">Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.</a>
+<i>Proceedings of the 45th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022)</i>, July 2022.</p>
+
+<p>Instructions for programmatic execution are shown at the bottom of this page (scroll down).</p>
+
+<div class="table-responsive">
+  <table class="table">
+    <thead>
+      <tr>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop" colspan="6"><b>TREC 2021</b></th>
+        <th class="headertop" colspan="3"><b>dev</b></th>
+        <th class="headertop" colspan="3"><b>dev2</b></th>
+      </tr>
+      <tr>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"><br/>AP</th>
+        <th class="headerbottom" scope="col">nDCG@10</th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+      </tr>
+    </thead>
+    <tbody>
+
+$rows
+
+    </tbody>
+  </table>
+</div>
+
+<div style="padding-top: 20px"/>
+
+<h4>Programmatic Execution</h4>
+
+<p>All experimental runs shown in the above table can be programmatically executed based on the instructions below.
+To list all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-doc --list-conditions
+</tt></blockquote>
+
+<p>These conditions correspond to the table rows above.</p>
+
+<p>For all conditions, just show the commands in a "dry run":</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-doc --all --display-commands --dry-run
+</tt></blockquote>
+
+<p>To actually run all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-doc --all --display-commands
+</tt></blockquote>
+
+<p>With the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>To show the commands for a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-doc --condition bm25-doc-default --display-commands --dry-run
+</tt></blockquote>
+
+<p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p>
+
+<p>To actually run a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-doc --condition bm25-doc-default --display-commands
+</tt></blockquote>
+
+<p>Again, with the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>Finally, to generate this page:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-doc --generate-report --output msmarco-v2-doc.html
+</tt></blockquote>
+
+<p>The output file <tt>msmarco-v2-doc.html</tt> should be identical to this page.</p>
+
+<div style="padding-top: 50px"/>
+
+      </div>
+
+
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script>
+    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script>
+
+<script>
+document.querySelectorAll('pre').forEach(function (codeBlock) {
+    var button = document.createElement('button');
+    button.className = 'copy-code-button';
+    button.type = 'button';
+    var s = codeBlock.innerText;
+    button.setAttribute('data-clipboard-text',s);
+    button.innerText = 'Copy';
+
+    // var pre = codeBlock.parentNode;
+    codeBlock.classList.add('prettyprint');
+    // pre.parentNode.insertBefore(button, pre);
+    codeBlock.appendChild(button);
+});
+
+var clipboard = new ClipboardJS('.copy-code-button');
+
+clipboard.on('success', function(e) {
+  console.info('Action:', e.action);
+  console.info('Text:', e.text);
+  console.info('Trigger:', e.trigger);
+  e.trigger.textContent = 'Copied';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+clipboard.on('error', function(e) {
+  console.error('Action:', e.action);
+  console.error('Trigger:', e.trigger);
+  e.trigger.textContent = 'Error Copying';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+</script>
+
+</body>
+</html>
diff --git a/pyserini/2cr/msmarco_html_v2_passage.template b/pyserini/2cr/msmarco_html_v2_passage.template
new file mode 100644
index 0000000000000000000000000000000000000000..79d0d012ba1790413305e4587de4663da741a012
--- /dev/null
+++ b/pyserini/2cr/msmarco_html_v2_passage.template
@@ -0,0 +1,292 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <title>Pyserini Reproductions: MS MARCO V2 Passage</title>
+    <!-- Font Awesome -->
+    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.11.2/css/all.css" />
+    <!-- Google Fonts Roboto -->
+    <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" />
+    <!-- MDB -->
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.css" rel="stylesheet" />
+
+    <style>
+tr.hide-table-padding td {
+  padding: 0;
+}
+
+.expand-button {
+  position: relative;
+}
+
+.accordion-toggle .expand-button:after {
+  position: absolute;
+  left:.75rem;
+  top: 50%;
+  transform: translate(0, -50%);
+  content: '-';
+}
+
+.accordion-toggle.collapsed .expand-button:after {
+  content: '+';
+}
+
+blockquote.mycode {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  margin-top: 15px;
+  padding-left: 15px;
+}
+
+blockquote.mycode2 {
+  border-left: 3px solid #ccc;
+  margin-left: 25px;
+  padding-top: 10px;
+  padding-bottom: 10px;
+  padding-left: 15px;
+}
+
+tr th.headertop {
+  border-bottom: none;
+  padding-bottom: 0rem
+}
+
+tr th.headerbottom {
+  padding-top: 0rem
+}
+
+.table>:not(caption)>*>*{padding:0.75rem 0.75rem}
+
+.copy-code-button {
+	border-radius: 0;
+	min-width: 55px;
+	background: none repeat scroll 0 0 transparent;
+	background-color: grey;
+	color: #F1F2F3 !important;
+	cursor: pointer;
+	border-style: none;
+	font-family: 'HELVETICA',sans-serif;
+	font-size: 0.8em;
+	font-weight: normal;
+	text-align: center;
+	text-decoration: none;
+	text-indent: 0;
+	text-transform: uppercase;
+	font-weight: 500;
+	line-height: 1.42rem;
+	margin: 0;
+	padding: 3px 8px;
+	position: absolute !important;
+	top: 0 !important;
+	right: 0 !important;
+}
+
+.copy-code-button > span {
+	color: #F1F2F3 !important;
+}
+
+.copy-code-button, ::before, ::after {
+	box-sizing: inherit;
+}
+
+.copy-code-button::before {
+	content: '';
+	display: inline-block;
+	width: 16px;
+	height: 16px;
+	margin-right: 3px;
+	background-size: contain;
+	background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB3aWR0aD0iMTVweCIgaGVpZ2h0PSIxNXB4IiB2aWV3Qm94PSIwIDAgMTUgMTUiIHZlcnNpb249IjEuMSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxuczp4bGluaz0iaHR0cDovL3d3dy53My5vcmcvMTk5OS94bGluayI+CiAgICA8IS0tIEdlbmVyYXRvcjogU2tldGNoIDUwLjIgKDU1MDQ3KSAtIGh0dHA6Ly93d3cuYm9oZW1pYW5jb2RpbmcuY29tL3NrZXRjaCAtLT4KICAgIDx0aXRsZT5QYWdlIDE8L3RpdGxlPgogICAgPGRlc2M+Q3JlYXRlZCB3aXRoIFNrZXRjaC48L2Rlc2M+CiAgICA8ZGVmcz48L2RlZnM+CiAgICA8ZyBpZD0iRmxvdyIgc3Ryb2tlPSJub25lIiBzdHJva2Utd2lkdGg9IjEiIGZpbGw9Im5vbmUiIGZpbGwtcnVsZT0iZXZlbm9kZCI+CiAgICAgICAgPGcgaWQ9IkJ0dG5faHRtbCIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoLTgxOS4wMDAwMDAsIC03NTMuMDAwMDAwKSIgZmlsbD0iI0ZGRkZGRiI+CiAgICAgICAgICAgIDxnIGlkPSJHcm91cC0xIiB0cmFuc2Zvcm09InRyYW5zbGF0ZSgzMTEuMDAwMDAwLCA0MDUuMDAwMDAwKSI+CiAgICAgICAgICAgICAgICA8ZyBpZD0iR3JvdXAtMiIgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTA4LjAwMDAwMCwgMzQyLjAwMDAwMCkiPgogICAgICAgICAgICAgICAgICAgIDxwYXRoIGQ9Ik0xMy45NzcyNzI3LDYgTDMuNDA5MDkwOTEsNiBDMi44NDQ1NDU0NSw2IDIuMzg2MzYzNjQsNi40NTgxODE4MiAyLjM4NjM2MzY0LDcuMDIyNzI3MjcgTDIuMzg2MzYzNjQsMTcuNTkwOTA5MSBDMi4zODYzNjM2NCwxOC4xNTU0NTQ1IDIuODQ0NTQ1NDUsMTguNjEzNjM2NCAzLjQwOTA5MDkxLDE4LjYxMzYzNjQgTDEzLjk3NzI3MjcsMTguNjEzNjM2NCBDMTQuNTQxODE4MiwxOC42MTM2MzY0IDE1LDE4LjE1NTQ1NDUgMTUsMTcuNTkwOTA5MSBMMTUsNy4wMjI3MjcyNyBDMTUsNi40NTgxODE4MiAxNC41NDE4MTgyLDYgMTMuOTc3MjcyNyw2IFogTTE0LjMxODE4MTgsMTcuNTkwOTA5MSBDMTQuMzE4MTgxOCwxNy43NzkwOTA5IDE0LjE2NTQ1NDUsMTcuOTMxODE4MiAxMy45NzcyNzI3LDE3LjkzMTgxODIgTDMuNDA5MDkwOTEsMTcuOTMxODE4MiBDMy4yMjA5MDkwOSwxNy45MzE4MTgyIDMuMDY4MTgxODIsMTcuNzc5MDkwOSAzLjA2ODE4MTgyLDE3LjU5MDkwOTEgTDMuMDY4MTgxODIsNy4wMjI3MjcyNyBDMy4wNjgxODE4Miw2LjgzNDU0NTQ1IDMuMjIwOTA5MDksNi42ODE4MTgxOCAzLjQwOTA5MDkxLDYuNjgxODE4MTggTDEzLjk3NzI3MjcsNi42ODE4MTgxOCBDMTQuMTY1NDU0NSw2LjY4MTgxODE4IDE0LjMxODE4MTgsNi44MzQ1NDU0NSAxNC4zMTgxODE4LDcuMDIyNzI3MjcgTDE0LjMxODE4MTgsMTcuNTkwOTA5MSBaIE0xMS45MzE4MTgyLDE5Ljk3NzI3MjcgQzExLjkzMTgxODIsMjAuMTY1NDU0NSAxMS43NzkwOTA5LDIwLjMxODE4MTggMTEuNTkwOTA5MSwyMC4zMTgxODE4IEwxLjAyMjcyNzI3LDIwLjMxODE4MTggQzAuODM0NTQ1NDU1LDIwLjMxODE4MTggMC42ODE4MTgxODIsMjAuMTY1NDU0NSAwLjY4MTgxODE4MiwxOS45NzcyNzI3IEwwLjY4MTgxODE4Miw5LjQwOTA5MDkxIEMwLjY4MTgxODE4Miw5LjIyMDkwOTA5IDAuODM0NTQ1NDU1LDkuMDY4MTgxODIgMS4wMjI3MjcyNyw5LjA2ODE4MTgyIEwxLjM2MzYzNjM2LDkuMDY4MTgxODIgTDEuMzYzNjM2MzYsOC4zODYzNjM2NCBMMS4wMjI3MjcyNyw4LjM4NjM2MzY0IEMwLjQ1ODE4MTgxOCw4LjM4NjM2MzY0IDAsOC44NDQ1NDU0NSAwLDkuNDA5MDkwOTEgTDAsMTkuOTc3MjcyNyBDMCwyMC41NDE4MTgyIDAuNDU4MTgxODE4LDIxIDEuMDIyNzI3MjcsMjEgTDExLjU5MDkwOTEsMjEgQzEyLjE1NTQ1NDUsMjEgMTIuNjEzNjM2NCwyMC41NDE4MTgyIDEyLjYxMzYzNjQsMTkuOTc3MjcyNyBMMTIuNjEzNjM2NCwxOS42MzYzNjM2IEwxMS45MzE4MTgyLDE5LjYzNjM2MzYgTDExLjkzMTgxODIsMTkuOTc3MjcyNyBaIiBpZD0iUGFnZS0xIj48L3BhdGg+CiAgICAgICAgICAgICAgICA8L2c+CiAgICAgICAgICAgIDwvZz4KICAgICAgICA8L2c+CiAgICA8L2c+Cjwvc3ZnPg==");
+	background-repeat: no-repeat;
+	position: relative;
+	top: 3px;
+}
+
+.copy-code-button:focus {
+    /* Avoid an ugly focus outline on click in Chrome,
+       but darken the button for accessibility.
+       See https://stackoverflow.com/a/25298082/1481479 */
+    /* background-color: #E6E6E6; */
+	outline: 0;
+}
+
+pre[class*="prettyprint"] {
+	position: relative;
+	overflow: hidden;
+}
+    </style>
+</head>
+<body>
+
+    <!-- Background image -->
+    <div id="intro" class="bg-image vh-100 shadow-1-strong" style="max-height: 150px">
+      <div class="mask" style="
+            background: linear-gradient(
+              45deg,
+              rgba(29, 236, 197, 0.7),
+              rgba(91, 14, 214, 0.7) 100%
+            );
+          ">
+        <div class="container d-flex align-items-center justify-content-center text-center h-100"  style="max-height: 150px">
+          <div class="text-white">
+            <h1 class="mb-3">$title</h1>
+          </div>
+        </div>
+      </div>
+    </div>
+    <!-- Background image -->
+
+<div class="container my-4">
+
+<p>The two-click<a href="#" data-mdb-toggle="tooltip" title="What are the two clicks, you ask? Copy and paste!"><sup>*</sup></a> reproduction matrix below provides commands for reproducing experimental results reported in the following paper.
+Numbered rows correspond to tables in the paper; additional conditions are provided for comparison purposes.</p>
+
+<p class="note note-light">Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. <a href="https://cs.uwaterloo.ca/~jimmylin/publications/Ma_etal_SIGIR2022.pdf">Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.</a>
+<i>Proceedings of the 45th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR 2022)</i>, July 2022.</p>
+
+<p>Instructions for programmatic execution are shown at the bottom of this page (scroll down).</p>
+
+<div class="table-responsive">
+  <table class="table">
+    <thead>
+      <tr>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop"></th>
+        <th class="headertop" colspan="6"><b>TREC 2021</b></th>
+        <th class="headertop" colspan="3"><b>dev</b></th>
+        <th class="headertop" colspan="3"><b>dev2</b></th>
+      </tr>
+      <tr>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col"><br/>AP</th>
+        <th class="headerbottom" scope="col">nDCG@10</th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+        <th class="headerbottom" scope="col"></th>
+        <th class="headerbottom" scope="col">RR@100</th>
+        <th class="headerbottom" scope="col">R@1K</th>
+      </tr>
+    </thead>
+    <tbody>
+
+$rows
+
+    </tbody>
+  </table>
+</div>
+
+<div style="padding-top: 20px"/>
+
+<h4>Programmatic Execution</h4>
+
+<p>All experimental runs shown in the above table can be programmatically executed based on the instructions below.
+To list all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-passage --list-conditions
+</tt></blockquote>
+
+<p>These conditions correspond to the table rows above.</p>
+
+<p>For all conditions, just show the commands in a "dry run":</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-passage --all --display-commands --dry-run
+</tt></blockquote>
+
+<p>To actually run all the experimental conditions:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-passage --all --display-commands
+</tt></blockquote>
+
+<p>With the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>To show the commands for a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-passage --condition bm25-default --display-commands --dry-run
+</tt></blockquote>
+
+<p>This will generate exactly the commands for a specific condition above (corresponding to a row in the table).</p>
+
+<p>To actually run a specific condition:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-passage --condition bm25-default --display-commands
+</tt></blockquote>
+
+<p>Again, with the above command, run files will be placed in the current directory.
+Use the option <tt>--directory runs/</tt> to place the runs in a sub-directory.</p>
+
+<p>Finally, to generate this page:</p>
+
+<blockquote class="mycode2"><tt>
+python -m pyserini.2cr.msmarco --collection v2-passage --generate-report --output msmarco-v2-passage.html
+</tt></blockquote>
+
+<p>The output file <tt>msmarco-v2-passage.html</tt> should be identical to this page.</p>
+
+<div style="padding-top: 50px"/>
+
+      </div>
+
+
+
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.0/jquery.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js"></script>
+    <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mdb-ui-kit/4.0.0/mdb.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.10/clipboard.min.js"></script>
+
+<script>
+document.querySelectorAll('pre').forEach(function (codeBlock) {
+    var button = document.createElement('button');
+    button.className = 'copy-code-button';
+    button.type = 'button';
+    var s = codeBlock.innerText;
+    button.setAttribute('data-clipboard-text',s);
+    button.innerText = 'Copy';
+
+    // var pre = codeBlock.parentNode;
+    codeBlock.classList.add('prettyprint');
+    // pre.parentNode.insertBefore(button, pre);
+    codeBlock.appendChild(button);
+});
+
+var clipboard = new ClipboardJS('.copy-code-button');
+
+clipboard.on('success', function(e) {
+  console.info('Action:', e.action);
+  console.info('Text:', e.text);
+  console.info('Trigger:', e.trigger);
+  e.trigger.textContent = 'Copied';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+clipboard.on('error', function(e) {
+  console.error('Action:', e.action);
+  console.error('Trigger:', e.trigger);
+  e.trigger.textContent = 'Error Copying';
+  window.setTimeout(function() {
+    e.trigger.textContent = 'Copy';
+  }, 2000);
+  e.clearSelection();
+});
+
+</script>
+
+</body>
+</html>
diff --git a/pyserini/__init__.py b/pyserini/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/pyserini/__init__.py
@@ -0,0 +1 @@
+
diff --git a/pyserini/__pycache__/__init__.cpython-310.pyc b/pyserini/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4106b0596fcf5ca13ed77ff9719e88f4a2ea3681
Binary files /dev/null and b/pyserini/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/encoded_corpus_info.cpython-310.pyc b/pyserini/__pycache__/encoded_corpus_info.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8c00f81eac20bff80743eb61392866798292d821
Binary files /dev/null and b/pyserini/__pycache__/encoded_corpus_info.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/encoded_query_info.cpython-310.pyc b/pyserini/__pycache__/encoded_query_info.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..055e4a201eda222f064e11768c0a2c72469beef5
Binary files /dev/null and b/pyserini/__pycache__/encoded_query_info.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/evaluate_script_info.cpython-310.pyc b/pyserini/__pycache__/evaluate_script_info.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d01bc2745f1a182bedd2be20587ded37518e9bd9
Binary files /dev/null and b/pyserini/__pycache__/evaluate_script_info.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/prebuilt_index_info.cpython-310.pyc b/pyserini/__pycache__/prebuilt_index_info.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70c9d8d071d996dccdb1769f0d48c6bfb8265c66
Binary files /dev/null and b/pyserini/__pycache__/prebuilt_index_info.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/pyclass.cpython-310.pyc b/pyserini/__pycache__/pyclass.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..033ca3d542385209f04449741f017aa3969fd36d
Binary files /dev/null and b/pyserini/__pycache__/pyclass.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/setup.cpython-310.pyc b/pyserini/__pycache__/setup.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66eb51a4e075d39e7922d6e73fba6f7fe00a6621
Binary files /dev/null and b/pyserini/__pycache__/setup.cpython-310.pyc differ
diff --git a/pyserini/__pycache__/util.cpython-310.pyc b/pyserini/__pycache__/util.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..98c8aaebd3e23b48e7eb9e681b3245b6e18403d9
Binary files /dev/null and b/pyserini/__pycache__/util.cpython-310.pyc differ
diff --git a/pyserini/analysis/__init__.py b/pyserini/analysis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3eed751bfa2cfda94a83221474328732d7c7d0f
--- /dev/null
+++ b/pyserini/analysis/__init__.py
@@ -0,0 +1,19 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import get_lucene_analyzer, Analyzer, JAnalyzer, JAnalyzerUtils, JDefaultEnglishAnalyzer, JWhiteSpaceAnalyzer
+
+__all__ = ['get_lucene_analyzer', 'Analyzer', 'JAnalyzer', 'JAnalyzerUtils', 'JDefaultEnglishAnalyzer', 'JWhiteSpaceAnalyzer']
diff --git a/pyserini/analysis/__pycache__/__init__.cpython-310.pyc b/pyserini/analysis/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..42f55a464d39b79106e2351f322d120fc8bac499
Binary files /dev/null and b/pyserini/analysis/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/analysis/__pycache__/_base.cpython-310.pyc b/pyserini/analysis/__pycache__/_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df59a778b125d30289ba4241adadedbc2c3884bf
Binary files /dev/null and b/pyserini/analysis/__pycache__/_base.cpython-310.pyc differ
diff --git a/pyserini/analysis/_base.py b/pyserini/analysis/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ca17c5ec6cafb9171a01cbdcac6303b5fc37526
--- /dev/null
+++ b/pyserini/analysis/_base.py
@@ -0,0 +1,166 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+from ..pyclass import autoclass
+
+# Wrappers around Lucene classes
+JAnalyzer = autoclass('org.apache.lucene.analysis.Analyzer')
+JArabicAnalyzer = autoclass('org.apache.lucene.analysis.ar.ArabicAnalyzer')
+JBengaliAnalyzer = autoclass('org.apache.lucene.analysis.bn.BengaliAnalyzer')
+JCJKAnalyzer = autoclass('org.apache.lucene.analysis.cjk.CJKAnalyzer')
+JDanishAnalyzer = autoclass('org.apache.lucene.analysis.da.DanishAnalyzer')
+JDefaultEnglishAnalyzer = autoclass('io.anserini.analysis.DefaultEnglishAnalyzer')
+JDutchAnalyzer = autoclass('org.apache.lucene.analysis.nl.DutchAnalyzer')
+JFinnishAnalyzer = autoclass('org.apache.lucene.analysis.fi.FinnishAnalyzer')
+JFrenchAnalyzer = autoclass('org.apache.lucene.analysis.fr.FrenchAnalyzer')
+JGermanAnalyzer = autoclass('org.apache.lucene.analysis.de.GermanAnalyzer')
+JHindiAnalyzer = autoclass('org.apache.lucene.analysis.hi.HindiAnalyzer')
+JHungarianAnalyzer = autoclass('org.apache.lucene.analysis.hu.HungarianAnalyzer')
+JIndonesianAnalyzer = autoclass('org.apache.lucene.analysis.id.IndonesianAnalyzer')
+JItalianAnalyzer = autoclass('org.apache.lucene.analysis.it.ItalianAnalyzer')
+JJapaneseAnalyzer = autoclass('org.apache.lucene.analysis.ja.JapaneseAnalyzer')
+JNorwegianAnalyzer = autoclass('org.apache.lucene.analysis.no.NorwegianAnalyzer')
+JPortugueseAnalyzer = autoclass('org.apache.lucene.analysis.pt.PortugueseAnalyzer')
+JRussianAnalyzer = autoclass('org.apache.lucene.analysis.ru.RussianAnalyzer')
+JSpanishAnalyzer = autoclass('org.apache.lucene.analysis.es.SpanishAnalyzer')
+JSwedishAnalyzer = autoclass('org.apache.lucene.analysis.sv.SwedishAnalyzer')
+JTeluguAnalyzer = autoclass('org.apache.lucene.analysis.te.TeluguAnalyzer')
+JThaiAnalyzer = autoclass('org.apache.lucene.analysis.th.ThaiAnalyzer')
+JTurkishAnalyzer = autoclass('org.apache.lucene.analysis.tr.TurkishAnalyzer')
+JWhiteSpaceAnalyzer = autoclass('org.apache.lucene.analysis.core.WhitespaceAnalyzer')
+JCharArraySet = autoclass('org.apache.lucene.analysis.CharArraySet')
+
+# Wrappers around Anserini classes
+JAnalyzerUtils = autoclass('io.anserini.analysis.AnalyzerUtils')
+JDefaultEnglishAnalyzer = autoclass('io.anserini.analysis.DefaultEnglishAnalyzer')
+JTweetAnalyzer = autoclass('io.anserini.analysis.TweetAnalyzer')
+JHuggingFaceTokenizerAnalyzer = autoclass('io.anserini.analysis.HuggingFaceTokenizerAnalyzer')
+
+
+def get_lucene_analyzer(language: str='en', stemming: bool=True, stemmer: str='porter', stopwords: bool=True, huggingFaceTokenizer: str=None) -> JAnalyzer:
+    """Create a Lucene ``Analyzer`` with specific settings.
+
+    Parameters
+    ----------
+    language : str
+        Name of analyzer.
+    stemming : bool
+        Set to stem.
+    stemmer : str
+        Stemmer to use.
+    stopwords : bool
+        Set to filter stopwords.
+    huggingFaceTokenizer: str
+        a huggingface model id or path to a tokenizer.json file
+
+    Returns
+    -------
+    JAnalyzer
+        Java ``Analyzer`` with specified settings.
+    """
+    if language.lower() == 'ar':
+        return JArabicAnalyzer()
+    elif language.lower() == 'bn':
+        return JBengaliAnalyzer()
+    elif language.lower() in ['zh', 'ko']:
+        return JCJKAnalyzer()
+    elif language.lower() == 'da':
+        return JDanishAnalyzer()
+    elif language.lower() == 'nl':
+        return JDutchAnalyzer()
+    elif language.lower() == 'fi':
+        return JFinnishAnalyzer()
+    elif language.lower() == 'fr':
+        return JFrenchAnalyzer()
+    elif language.lower() == 'de':
+        return JGermanAnalyzer()
+    elif language.lower() == 'hi':
+        return JHindiAnalyzer()
+    elif language.lower() == 'hu':
+        return JHungarianAnalyzer()
+    elif language.lower() == 'id':
+        return JIndonesianAnalyzer()
+    elif language.lower() == 'it':
+        return JItalianAnalyzer()
+    elif language.lower() == 'ja':
+        return JJapaneseAnalyzer()
+    elif language.lower() == 'no':
+        return JNorwegianAnalyzer()
+    elif language.lower() == 'pt':
+        return JPortugueseAnalyzer()
+    elif language.lower() == 'ru':
+        return JRussianAnalyzer()
+    elif language.lower() == 'es':
+        return JSpanishAnalyzer()
+    elif language.lower() == 'te':
+        return JTeluguAnalyzer()
+    elif language.lower() == 'th':
+        return JThaiAnalyzer()
+    elif language.lower() == 'tr':
+        return JTurkishAnalyzer()
+    elif language.lower() == 'tweet':
+        return JTweetAnalyzer()
+    elif language.lower() == 'hgf_tokenizer':
+        return JHuggingFaceTokenizerAnalyzer(huggingFaceTokenizer)
+    elif language.lower() == 'en':
+        if stemming:
+            if stopwords:
+                return JDefaultEnglishAnalyzer.newStemmingInstance(stemmer)
+            else:
+                return JDefaultEnglishAnalyzer.newStemmingInstance(stemmer, JCharArraySet.EMPTY_SET)
+        else:
+            if stopwords:
+                return JDefaultEnglishAnalyzer.newNonStemmingInstance()
+            else:
+                return JDefaultEnglishAnalyzer.newNonStemmingInstance(JCharArraySet.EMPTY_SET)
+    else:
+        raise ValueError('Invalid configuration.')
+
+
+class Analyzer:
+    """Python wrapper around a Lucene ``Analyzer`` to simplify analysis.
+
+    Parameters
+    ----------
+    analyzer : JAnalyzer
+        Lucene ``Analyzer``.
+    """
+
+    def __init__(self, analyzer):
+        if not isinstance(analyzer, JAnalyzer):
+            raise TypeError('Invalid JAnalyzer!')
+        self.analyzer = analyzer
+
+    def analyze(self, text: str) -> List[str]:
+        """Analyze a piece of text.
+
+        Parameters
+        ----------
+        text : str
+            Text to analyze.
+
+        Returns
+        -------
+        List[str]
+            List of tokens corresponding to the output of the analyzer.
+        """
+        results = JAnalyzerUtils.analyze(self.analyzer, text)
+        tokens = []
+        for token in results.toArray():
+            tokens.append(token)
+        return tokens
diff --git a/pyserini/collection/__init__.py b/pyserini/collection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..464516a633a1fe9c4f8943f2717b3671cba3ba7a
--- /dev/null
+++ b/pyserini/collection/__init__.py
@@ -0,0 +1,20 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import Collection, FileSegment, SourceDocument
+from ._collection_support import Cord19Article
+
+__all__ = ['Collection', 'FileSegment', 'SourceDocument', 'Cord19Article']
diff --git a/pyserini/collection/_base.py b/pyserini/collection/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad6084c2232896671c75ac6a297f76a9c30a0c00
--- /dev/null
+++ b/pyserini/collection/_base.py
@@ -0,0 +1,153 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import re
+from enum import Enum
+
+from ..multithreading import Counters
+from ..pyclass import autoclass, cast, JPaths
+
+logger = logging.getLogger(__name__)
+
+
+JFileSegment = autoclass('io.anserini.collection.FileSegment')
+JSourceDocument = autoclass('io.anserini.collection.SourceDocument')
+
+
+class JCollections(Enum):
+    AclAnthology = autoclass('io.anserini.collection.AclAnthology')
+    CarCollection = autoclass('io.anserini.collection.CarCollection')
+    Cord19AbstractCollection = autoclass('io.anserini.collection.Cord19AbstractCollection')
+    ClueWeb09Collection = autoclass('io.anserini.collection.ClueWeb09Collection')
+    ClueWeb12Collection = autoclass('io.anserini.collection.ClueWeb12Collection')
+    HtmlCollection = autoclass('io.anserini.collection.HtmlCollection')
+    JsonCollection = autoclass('io.anserini.collection.JsonCollection')
+    NewYorkTimesCollection = autoclass('io.anserini.collection.NewYorkTimesCollection')
+    TrecCollection = autoclass('io.anserini.collection.TrecCollection')
+    TrecwebCollection = autoclass('io.anserini.collection.TrecwebCollection')
+    TweetCollection = autoclass('io.anserini.collection.TweetCollection')
+    WashingtonPostCollection = autoclass('io.anserini.collection.WashingtonPostCollection')
+    WikipediaCollection = autoclass('io.anserini.collection.WikipediaCollection')
+
+
+class Collection:
+    """
+    Iterable wrapper class for Anserini's DocumentCollection.
+
+    Parameters
+    ----------
+    collection_class : str
+        Name of collection class to instantiate
+    collection_path : str
+        Path to directory containing collection
+    """
+
+    def __init__(self, collection_class, collection_path):
+        self.counters = Counters()
+        self.collection_class = collection_class
+        self.collection_path = JPaths.get(collection_path)
+        self.object = self._get_collection()
+        self.collection_iterator = self.object.iterator()
+
+    def _get_collection(self):
+        try:
+            return JCollections[self.collection_class].value(self.collection_path)
+        except:
+            raise ValueError(self.collection_class)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.collection_iterator.hasNext():
+            fs = self.collection_iterator.next()
+            return FileSegment(self, fs, fs.getSegmentPath())
+        else:
+            raise StopIteration
+
+
+class FileSegment:
+    """
+    Iterable wrapper class for Anserini's FileSegment.
+
+    Parameters
+    ----------
+    collection : Collection
+        Parent collection of the file segment
+    segment : JFileSegment
+        FileSegment object to create wrapper from
+    segment_path : str
+        Path to file backing the file segment
+    """
+
+    def __init__(self, collection, segment, segment_path):
+        self.collection = collection
+        try:
+            self.object = cast(collection.object.getClass().getName() +
+                               '$Segment', segment)
+        except:
+            logger.exception('Exception from casting FileSegment type...')
+            self.object = cast('io.anserini.collection.FileSegment', segment)
+
+        self.segment_iterator = self.object.iterator()
+        self.segment_path = segment_path
+        self.segment_name = re.sub(r'\\|\/', '-', collection.collection_path.relativize(segment_path).toString())
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.object.iterator().hasNext():
+            d = self.object.iterator().next()
+            return SourceDocument(self, d)
+        else:
+            # log if iteration stopped by error
+            if self.object.getErrorStatus():
+                logger.error(self.segment_name + ': Error from segment iteration, stopping...')
+                self.collection.counters.errors.increment()
+
+            # stop iteration and log skipped documents
+            skipped = self.object.getSkippedCount()
+            if skipped > 0:
+                self.collection.counters.skips.increment(skipped)
+                logger.warning(self.segment_name + ': ' + str(skipped) + ' documents skipped')
+            self.object.close()
+            raise StopIteration
+
+
+class SourceDocument:
+    """
+    Wrapper class for Anserini's SourceDocument.
+
+    Parameters
+    ----------
+
+    segment : FileSegment
+        Parent segment of the source document
+    document : io.anserini.collection.SourceDocument
+        SourceDocument object to create wrapper from
+    """
+
+    def __init__(self, segment, document):
+        if not isinstance(document, JSourceDocument):
+            raise TypeError('Invalid JSourceDocument!')
+        self.segment = segment
+        self.object = document
+        self.id = self.object.id()
+        self.indexable = self.object.indexable()
+        self.contents = self.object.contents()
+        self.raw = self.object.raw()
diff --git a/pyserini/collection/_collection_support.py b/pyserini/collection/_collection_support.py
new file mode 100644
index 0000000000000000000000000000000000000000..843bc0a74d9b54067b8aa446f9904bfb7dbe780c
--- /dev/null
+++ b/pyserini/collection/_collection_support.py
@@ -0,0 +1,78 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Implementations of support for specific collections.
+
+import json
+
+
+class Cord19Article:
+    """Wrapper class for a raw JSON article from AI2's COVID-19 Open Research Dataset (CORD-19).
+
+    Parameters
+    ----------
+    doc : str
+        A JSON string of a CORD-19 article.
+    """
+
+    def __init__(self, doc):
+        self.json = json.loads(doc)
+        # Performs some basic error checking, throws an exception if user tries to instantiate with something
+        # that isn't from CORD-19.
+        if 'cord_uid' in self.json:
+            self.full_text = False
+        elif 'paper_id' in self.json:
+            self.full_text = True
+        else:
+            raise TypeError
+
+    def is_full_text(self):
+        return self.json['has_full_text']
+
+    def cord_uid(self):
+        return self.json['cord_uid']
+
+    def bib_entries(self):
+        return self.json['bib_entries']
+
+    def title(self):
+        try:
+            if self.is_full_text():
+                return self.json['metadata']['title']
+            else:
+                return self.json['csv_metadata']['title']
+        except KeyError:
+            return ''
+
+    def abstract(self):
+        try:
+            # For a full-text article, we can grab the abstract from two independent sources, the metadata or the
+            # actual full text. Here, we make the decision to use the metadata, even for full text.
+            return self.json['csv_metadata']['abstract']
+        except KeyError:
+            return ''
+
+    def metadata(self):
+        return self.json['csv_metadata']
+
+    def body(self):
+        try:
+            if self.is_full_text():
+                return [entry['text'] for entry in self.json['body_text']]
+            else:
+                return []
+        except KeyError:
+            return ''
diff --git a/pyserini/demo/acl.py b/pyserini/demo/acl.py
new file mode 100644
index 0000000000000000000000000000000000000000..57d0b6f7d9d434ce9b4d1e6596f332c0343e4c95
--- /dev/null
+++ b/pyserini/demo/acl.py
@@ -0,0 +1,124 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This script provides an interactive web interface demo for retrieval on the ACL dataset.
+It requires `flask` (`pip install flask~=2.2.0`).
+An example command looks like `python -m pyserini.demo.acl` that starts up a server on port 8080.
+The demo can be accessed via "http://localhost:8080" in a web browser.
+Additional arguments include:
+    --port [PORT] --hits [Number of hits]
+    --k1 [BM25 k1] --b [BM25 b] --device [cpu, cuda]
+"""
+import json
+import logging
+from argparse import ArgumentParser
+from functools import partial
+from typing import Callable, Optional, Tuple, Union
+
+from flask import Flask, render_template, request, flash, jsonify
+from pyserini.search import LuceneSearcher, FaissSearcher, AutoQueryEncoder
+
+logging.basicConfig(
+    format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    level=logging.INFO,
+)
+logger = logging.getLogger('acl-demo')
+
+VERSION = '1.0'
+Searcher = Union[FaissSearcher, LuceneSearcher]
+
+
+def create_app(k: int, load_searcher_fn: Callable[[str], Tuple[Searcher, str]]):
+    app = Flask(__name__)
+
+    lang = 'en'
+    searcher, retriever = load_searcher_fn(lang)
+
+    @app.route('/')
+    def index():
+        nonlocal lang, searcher, retriever
+        return render_template('acl.html', lang=lang, retriever=retriever)
+
+    @app.route('/search', methods=['GET', 'POST'])
+    def search():
+        nonlocal lang, searcher, retriever
+        query = request.form['q']
+        if not query:
+            search_results = []
+            flash('Question is required')
+        else:
+            hits = searcher.search(query, k=k)
+            docs = [searcher.doc(hit.docid) for hit in hits]
+            search_results = [
+                {
+                    'rank': r + 1,
+                    'docid': hit.docid,
+                    'doc': docs[r].contents(),
+                    'score': hit.score,
+                }
+                for r, hit in enumerate(hits)
+            ]
+        return render_template(
+            'acl.html', search_results=search_results, query=query, lang=lang, retriever=retriever
+        )
+
+
+    return app
+
+
+def _load_sparse_searcher(language: str, k1: Optional[float]=None, b: Optional[float]=None) -> (Searcher, str):
+    searcher = LuceneSearcher('indexes/lucene-index-acl-paragraph')
+    searcher.set_language(language)
+    if k1 is not None and b is not None:
+        searcher.set_bm25(k1, b)
+        retriever_name = f'BM25 (k1={k1}, b={b})'
+    else:
+        retriever_name = 'BM25'
+
+    return searcher, retriever_name
+
+
+def main():
+    parser = ArgumentParser()
+
+    parser.add_argument('--k1', type=float, help='BM25 k1 parameter.')
+    parser.add_argument('--b', type=float, help='BM25 b parameter.')
+    parser.add_argument('--hits', type=int, default=10, help='Number of hits returned by the retriever')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='cpu',
+        help='Device to run query encoder, cpu or [cuda:0, cuda:1, ...] (used only when index is based on FAISS)',
+    )
+    parser.add_argument(
+        '--port',
+        default=8080,
+        type=int,
+        help='Web server port',
+    )
+
+    args = parser.parse_args()
+
+    load_fn = partial(_load_sparse_searcher, k1=args.k1, b=args.b)
+
+    app = create_app(args.hits, load_fn)
+    app.run(host='0.0.0.0', port=args.port)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pyserini/demo/dpr.py b/pyserini/demo/dpr.py
new file mode 100644
index 0000000000000000000000000000000000000000..02e9aca428be06bd1a54505fa4f6542e4f9d6ad3
--- /dev/null
+++ b/pyserini/demo/dpr.py
@@ -0,0 +1,105 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cmd
+import json
+import random
+
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.search.faiss import FaissSearcher, DprQueryEncoder
+from pyserini.search.hybrid import HybridSearcher
+from pyserini import search
+
+
+class DPRDemo(cmd.Cmd):
+    nq_dev_topics = list(search.get_topics('dpr-nq-dev').values())
+    trivia_dev_topics = list(search.get_topics('dpr-trivia-dev').values())
+
+    ssearcher = LuceneSearcher.from_prebuilt_index('wikipedia-dpr')
+    searcher = ssearcher
+
+    encoder = DprQueryEncoder("facebook/dpr-question_encoder-multiset-base")
+    index = 'wikipedia-dpr-multi-bf'
+    dsearcher = FaissSearcher.from_prebuilt_index(
+        index,
+        encoder
+    )
+    hsearcher = HybridSearcher(dsearcher, ssearcher)
+
+    k = 10
+    prompt = '>>> '
+
+    def precmd(self, line):
+        if line[0] == '/':
+            line = line[1:]
+        return line
+
+    def do_help(self, arg):
+        print(f'/help    : returns this message')
+        print(f'/k [NUM] : sets k (number of hits to return) to [NUM]')
+        print(f'/mode [MODE] : sets retriever type to [MODE] (one of sparse, dense, hybrid)')
+        print(f'/random [COLLECTION]: returns results for a random question from the dev subset [COLLECTION] (one of nq, trivia).')
+
+    def do_k(self, arg):
+        print(f'setting k = {int(arg)}')
+        self.k = int(arg)
+
+    def do_mode(self, arg):
+        if arg == "sparse":
+            self.searcher = self.ssearcher
+        elif arg == "dense":
+            self.searcher = self.dsearcher
+        elif arg == "hybrid":
+            self.searcher = self.hsearcher
+        else:
+            print(
+                f'Mode "{arg}" is invalid. Mode should be one of [sparse, dense, hybrid].')
+            return
+        print(f'setting retriver = {arg}')
+
+    def do_random(self, arg):
+        if arg == "nq":
+            topics = self.nq_dev_topics
+        elif arg == "trivia":
+            topics = self.trivia_dev_topics
+        else:
+            print(
+                f'Collection "{arg}" is invalid. Collection should be one of [nq, trivia].')
+            return
+        q = random.choice(topics)['title']
+        print(f'question: {q}')
+        self.default(q)
+
+    def do_EOF(self, line):
+        return True
+
+    def default(self, q):
+        hits = self.searcher.search(q, self.k)
+
+        for i in range(0, len(hits)):
+            raw_doc = None
+            if isinstance(self.searcher, LuceneSearcher):
+                raw_doc = hits[i].raw
+            else:
+                doc = self.searcher.doc(hits[i].docid)
+                if doc:
+                    raw_doc = doc.raw()
+            jsondoc = json.loads(raw_doc)
+            print(f'{i + 1:2} {hits[i].score:.5f} {jsondoc["contents"]}')
+
+
+if __name__ == '__main__':
+    DPRDemo().cmdloop()
diff --git a/pyserini/demo/miracl.py b/pyserini/demo/miracl.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffecb93f2557658f8b5a3bd3546fb9be295f9240
--- /dev/null
+++ b/pyserini/demo/miracl.py
@@ -0,0 +1,149 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This script provides an interactive web interface demo for retrieval on the MIRACL dataset.
+It requires `flask` (`pip install flask~=2.2.0`).
+An example command looks like `python -m pyserini.demo.miracl` that starts up a server on port 8080.
+The demo can be accessed via "http://localhost:8080" in a web browser.
+Additional arguments include:
+    --port [PORT] --hits [Number of hits] --index [BM25 or mdpr-tied-pft-msmarco]
+    --k1 [BM25 k1] --b [BM25 b] --device [cpu, cuda]
+"""
+import json
+import logging
+from argparse import ArgumentParser
+from functools import partial
+from typing import Callable, Optional, Tuple, Union
+
+from flask import Flask, render_template, request, flash, jsonify
+from pyserini.search import LuceneSearcher, FaissSearcher, AutoQueryEncoder
+
+logging.basicConfig(
+    format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S',
+    level=logging.INFO,
+)
+logger = logging.getLogger('miracl-demo')
+
+VERSION = '1.0'
+LANGUAGES = ('ar', 'bn', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th', 'zh')
+Searcher = Union[FaissSearcher, LuceneSearcher]
+
+
+def create_app(k: int, load_searcher_fn: Callable[[str], Tuple[Searcher, str]]):
+    app = Flask(__name__)
+
+    lang = LANGUAGES[0]
+    searcher, retriever = load_searcher_fn(lang)
+
+    @app.route('/')
+    def index():
+        nonlocal lang, searcher, retriever
+        return render_template('miracl.html', lang=lang, retriever=retriever)
+
+    @app.route('/search', methods=['GET', 'POST'])
+    def search():
+        nonlocal lang, searcher, retriever
+        query = request.form['q']
+        if not query:
+            search_results = []
+            flash('Question is required')
+        else:
+            hits = searcher.search(query, k=k)
+            docs = [json.loads(searcher.doc(hit.docid).raw()) for hit in hits]
+            search_results = [
+                {
+                    'rank': r + 1,
+                    'docid': hit.docid,
+                    'doc': docs[r]['text'],
+                    'title': docs[r]['title'],
+                    'score': hit.score,
+                }
+                for r, hit in enumerate(hits)
+            ]
+        return render_template(
+            'miracl.html', search_results=search_results, query=query, lang=lang, retriever=retriever
+        )
+
+    @app.route('/lang', methods=['GET'])
+    def change_language():
+        nonlocal lang, searcher, retriever
+        new_lang = request.args.get('new_lang', '', type=str)
+        if not new_lang or new_lang not in LANGUAGES:
+            return
+
+        lang = new_lang
+        searcher, retriever = load_searcher_fn(lang)
+        return jsonify(lang=lang)
+
+    return app
+
+
+def _load_sparse_searcher(language: str, k1: Optional[float]=None, b: Optional[float]=None) -> (Searcher, str):
+    searcher = LuceneSearcher.from_prebuilt_index(f'miracl-v{VERSION}-{language}')
+    searcher.set_language(language)
+    if k1 is not None and b is not None:
+        searcher.set_bm25(k1, b)
+        retriever_name = f'BM25 (k1={k1}, b={b})'
+    else:
+        retriever_name = 'BM25'
+
+    return searcher, retriever_name
+
+
+def _load_faiss_searcher(language: str, device:  str) -> (Searcher, str):
+    query_encoder = AutoQueryEncoder(encoder_dir='castorini/mdpr-tied-pft-msmarco', device=device)
+    searcher = FaissSearcher.from_prebuilt_index(
+        f'miracl-v{VERSION}-{language}-mdpr-tied-pft-msmarco', query_encoder
+    )
+    retriever_name = 'mDPR-pFT-MSMARCO'
+    return searcher, retriever_name
+
+
+def main():
+    parser = ArgumentParser()
+
+    parser.add_argument('--index', default='BM25', choices=('BM25', 'mdpr-tied-pft-msmarco'), help='Index type.')
+    parser.add_argument('--k1', type=float, help='BM25 k1 parameter.')
+    parser.add_argument('--b', type=float, help='BM25 b parameter.')
+    parser.add_argument('--hits', type=int, default=10, help='Number of hits returned by the retriever')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='cpu',
+        help='Device to run query encoder, cpu or [cuda:0, cuda:1, ...] (used only when index is based on FAISS)',
+    )
+    parser.add_argument(
+        '--port',
+        default=8080,
+        type=int,
+        help='Web server port',
+    )
+
+    args = parser.parse_args()
+
+    if args.index == 'mdpr-tied-pft-msmarco':
+        load_fn = partial(_load_faiss_searcher, device=args.device)
+    else:
+        load_fn = partial(_load_sparse_searcher, k1=args.k1, b=args.b)
+
+    app = create_app(args.hits, load_fn)
+    app.run(host='0.0.0.0', port=args.port)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/pyserini/demo/msmarco.py b/pyserini/demo/msmarco.py
new file mode 100644
index 0000000000000000000000000000000000000000..b73276d1c4b20ea88aea10155a4d0f25d2764b56
--- /dev/null
+++ b/pyserini/demo/msmarco.py
@@ -0,0 +1,118 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cmd
+import json
+import os
+import random
+
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.search.faiss import FaissSearcher, TctColBertQueryEncoder, AnceQueryEncoder
+from pyserini.search.hybrid import HybridSearcher
+from pyserini import search
+
+
+class MsMarcoDemo(cmd.Cmd):
+    dev_topics = list(search.get_topics('msmarco-passage-dev-subset').values())
+
+    ssearcher = LuceneSearcher.from_prebuilt_index('msmarco-passage')
+    dsearcher = None
+    hsearcher = None
+    searcher = ssearcher
+
+    k = 10
+    prompt = '>>> '
+
+    # https://stackoverflow.com/questions/35213134/command-prefixes-in-python-cli-using-cmd-in-pythons-standard-library
+    def precmd(self, line):
+        if line[0] == '/':
+            line = line[1:]
+        return line
+
+    def do_help(self, arg):
+        print(f'/help    : returns this message')
+        print(f'/k [NUM] : sets k (number of hits to return) to [NUM]')
+        print(f'/model [MODEL] : sets encoder to use the model [MODEL] (one of tct, ance)')
+        print(f'/mode [MODE] : sets retriever type to [MODE] (one of sparse, dense, hybrid)')
+        print(f'/random : returns results for a random question from dev subset')
+
+    def do_k(self, arg):
+        print(f'setting k = {int(arg)}')
+        self.k = int(arg)
+
+    def do_mode(self, arg):
+        if arg == "sparse":
+            self.searcher = self.ssearcher
+        elif arg == "dense":
+            if self.dsearcher is None:
+                print(f'Specify model through /model before using dense retrieval.')
+                return
+            self.searcher = self.dsearcher
+        elif arg == "hybrid":
+            if self.hsearcher is None:
+                print(f'Specify model through /model before using hybrid retrieval.')
+                return
+            self.searcher = self.hsearcher
+        else:
+            print(
+                f'Mode "{arg}" is invalid. Mode should be one of [sparse, dense, hybrid].')
+            return
+        print(f'setting retriver = {arg}')
+
+    def do_model(self, arg):
+        if arg == "tct":
+            encoder = TctColBertQueryEncoder("castorini/tct_colbert-msmarco")
+            index = "msmarco-passage-tct_colbert-hnsw"
+        elif arg == "ance":
+            encoder = AnceQueryEncoder("castorini/ance-msmarco-passage")
+            index = "msmarco-passage-ance-bf"
+        else:
+            print(
+                f'Model "{arg}" is invalid. Model should be one of [tct, ance].')
+            return
+
+        self.dsearcher = FaissSearcher.from_prebuilt_index(
+            index,
+            encoder
+        )
+        self.hsearcher = HybridSearcher(self.dsearcher, self.ssearcher)
+        print(f'setting model = {arg}')
+
+    def do_random(self, arg):
+        q = random.choice(self.dev_topics)['title']
+        print(f'question: {q}')
+        self.default(q)
+
+    def do_EOF(self, line):
+        return True
+
+    def default(self, q):
+        hits = self.searcher.search(q, self.k)
+
+        for i in range(0, len(hits)):
+            raw_doc = None
+            if isinstance(self.searcher, LuceneSearcher):
+                raw_doc = hits[i].raw
+            else:
+                doc = self.searcher.doc(hits[i].docid)
+                if doc:
+                    raw_doc = doc.raw()
+            jsondoc = json.loads(raw_doc)
+            print(f'{i + 1:2} {hits[i].score:.5f} {jsondoc["contents"]}')
+
+
+if __name__ == '__main__':
+    MsMarcoDemo().cmdloop()
diff --git a/pyserini/demo/templates/acl.html b/pyserini/demo/templates/acl.html
new file mode 100644
index 0000000000000000000000000000000000000000..f3e3e881b069cfc45bf26fc764390f2e1b8b7d38
--- /dev/null
+++ b/pyserini/demo/templates/acl.html
@@ -0,0 +1,74 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta property="og:title" content="ACL 🌍🙌🌏">
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css" rel="stylesheet"
+          integrity="sha384-Zenh87qX5JnK2Jl0vWa8Ck2rdkQ2Bzep5IDxbcnCeuOxjzrPF/et3URy9Bv1WTRi" crossorigin="anonymous">
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.9.1/font/bootstrap-icons.css">
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js"
+            integrity="sha384-OERcA2EqjJCMA+/3y+gxIOqMEjwtxJY7qPCqsdltbNJuaOe923+mo//f6V8Qbsw3"
+            crossorigin="anonymous"></script>
+    <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.1/dist/jquery.min.js"></script>
+
+    <script>
+        $SCRIPT_ROOT = {{ request.script_root|tojson }};
+    </script>
+    <title>ACL 🌍🙌🌏 Demo</title>
+</head>
+<body>
+
+<div style="display: flex; align-items: center; gap: 10px;">
+    <h2>ACL</h2>
+    <img src="https://aclanthology.org/images/acl-logo.svg" alt="acl logo" width="50px">
+    <h2>Demo</h2>
+</div>
+<br/>
+
+<div class="container text-center">
+    {% for message in get_flashed_messages() %}
+        <div class="alert">{{ message }}</div>
+    {% endfor %}
+
+    <form action="/search" method="post">
+        <div class="row-cols-3">
+            <div class="input-group mb-3">
+                <input type="text" class="form-control" placeholder="Enter a Question" aria-label="Question" name="q"
+                       aria-describedby="button-addon2" value="{{ query if query else '' }}">
+                <button class="btn btn-outline-secondary" type="submit" id="button-addon2"><i class="bi bi-search"></i>
+                </button>
+            </div>
+        </div>
+    </form>
+
+    {% if search_results %}
+        <div class="row">
+            <table class="table">
+                <thead>
+                <tr>
+                    <th scope="col">#</th>
+                    <th scope="col">Score</th>
+                    <th scope="col">Passage ID</th>
+                    <th scope="col">Content</th>
+                </tr>
+                </thead>
+                <tbody class="table-group-divider">
+                {% for res in search_results %}
+                    <tr class="{{ 'table-secondary' if res['rank'] % 2 else 'table-light' }}">
+                        <th scope="row">{{ res["rank"] }}</th>
+                        <td>{{ "%.2f"|format(res["score"]) }}</td>
+                        <td>{{ res["docid"] }}</td>
+
+                        <td style="word-wrap: break-word;min-width: 600px;max-width: 600px;"
+                            class="text-{{ 'end' if lang in ('ar', 'fa') else 'start' }}">
+                            <small>{{ res["doc"] }}</small>
+                        </td>
+                    </tr>
+                {% endfor %}
+                </tbody>
+            </table>
+        </div>
+    {% endif %}
+</div>
+</body>
+</html>
\ No newline at end of file
diff --git a/pyserini/demo/templates/assets/acl-logo.svg b/pyserini/demo/templates/assets/acl-logo.svg
new file mode 100644
index 0000000000000000000000000000000000000000..8b2d548dff356aaa98a0e3dbcee669e4af754f43
--- /dev/null
+++ b/pyserini/demo/templates/assets/acl-logo.svg
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="500" height="500" viewBox="0 0 500 500" xml:space="preserve">
+<desc>Created with Fabric.js 5.3.0</desc>
+<defs>
+</defs>
+<g transform="matrix(1 0 0 1 250 250)" id="KZOsr6V7D6GJaiFQR5qOD"  >
+<path style="stroke: none; stroke-width: 12.89541149; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(237,28,36); fill-rule: evenodd; opacity: 1;" vector-effect="non-scaling-stroke"  transform=" translate(-34, -23)" d="M 41.977553 -2.8421709e-14 C 41.977553 1.76178 41.977553 1.44211 41.977553 3.0158 L 7.4869054 3.0158 L 0 3.0158 L 0 10.50079 L 0 38.47867 L 0 46 L 7.4869054 46 L 49.500802 46 L 56.987708 46 L 68 46 L 68 30.99368 L 56.987708 30.99368 L 56.987708 10.50079 L 56.987708 3.0158 C 56.987708 1.44211 56.987708 1.76178 56.987708 -2.8421709e-14 L 41.977553 -2.8421709e-14 z M 15.010155 17.98578 L 41.977553 17.98578 L 41.977553 30.99368 L 15.010155 30.99368 L 15.010155 17.98578 z" stroke-linecap="round" />
+</g>
+</svg>
\ No newline at end of file
diff --git a/pyserini/demo/templates/miracl.html b/pyserini/demo/templates/miracl.html
new file mode 100644
index 0000000000000000000000000000000000000000..28e49fdee803768af81603c0b92583253c37b50e
--- /dev/null
+++ b/pyserini/demo/templates/miracl.html
@@ -0,0 +1,127 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta property="og:title" content="MIRACL 🌍🙌🌏">
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Zenh87qX5JnK2Jl0vWa8Ck2rdkQ2Bzep5IDxbcnCeuOxjzrPF/et3URy9Bv1WTRi" crossorigin="anonymous">
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.9.1/font/bootstrap-icons.css">
+    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-OERcA2EqjJCMA+/3y+gxIOqMEjwtxJY7qPCqsdltbNJuaOe923+mo//f6V8Qbsw3" crossorigin="anonymous"></script>
+    <script src="https://cdn.jsdelivr.net/npm/jquery@3.6.1/dist/jquery.min.js"></script>
+
+    <script>
+      $SCRIPT_ROOT = {{ request.script_root|tojson }};
+
+      $( document ).ready(function() {
+        $("#loading").hide();
+        $('#language').val("{{lang}}");
+      });
+
+      $(function() {
+        $('#language').on('change', function() {
+          $.getJSON($SCRIPT_ROOT + '/lang', {
+            new_lang: this.value,
+          }, function(data) {
+            $("#language").removeAttr('disabled');
+            $("#loading").hide();
+          });
+
+          $(this).attr('disabled','disabled');
+          $("#loading").show();
+
+          return false;
+        });
+      });
+    </script>
+    <title>MIRACL 🌍🙌🌏 Demo</title>
+</head>
+<body>
+    <h2>MIRACL 🌍🙌🌏 Demo</h2>
+    <h4>Multilingual Information Retrieval Across a Continuum of Languages</h4>
+
+    <br/>
+
+    <p class="lead">
+        <a href="http://miracl.ai/">MIRACL</a> is a multilingual dataset for ad hoc retrieval that consists of 18 different languages, collectively encompassing over three billion native speakers around the world.
+    </p>
+
+    <div class="row g-3 align-items-center">
+        <label class="col-auto" for="language">This demo running on the language</label>
+        <div class="col-auto">
+            <select class="form-select form-select-sm" aria-label=".form-select-sm" id="language">
+                <option value="ar">Arabic</option>
+                <option value="bn">Bengali</option>
+                <option value="en">English</option>
+                <option value="es">Spanish</option>
+                <option value="fa">Persian</option>
+                <option value="fi">Finnish</option>
+                <option value="fr">French</option>
+                <option value="hi">Hindi</option>
+                <option value="id">Indonesian</option>
+                <option value="ja">Japanese</option>
+                <option value="ko">Korean</option>
+                <option value="ru">Russian</option>
+                <option value="sw">Swahili</option>
+                <option value="te">Telugu</option>
+                <option value="th">Thai</option>
+                <option value="zh">Chinese</option>
+            </select>
+        </div>
+        <div class="col-auto">
+            <div class="spinner-border text-secondary" role="status" id="loading">
+                <span class="visually-hidden">Loading...</span>
+            </div>
+        </div>
+        <div class="col-auto">
+            <span>
+            retrieves passages using <em>{{retriever}}</em>.
+            </span>
+        </div>
+    </div>
+
+    <br/>
+
+    <div class="container text-center">
+        {% for message in get_flashed_messages() %}
+            <div class="alert">{{ message }}</div>
+        {% endfor %}
+
+        <form action="/search" method="post">
+            <div class="row-cols-3">
+                <div class="input-group mb-3">
+                    <input type="text" class="form-control" placeholder="Enter a Question" aria-label="Question" name="q" aria-describedby="button-addon2" value="{{query if query else ''}}">
+                    <button class="btn btn-outline-secondary" type="submit" id="button-addon2"><i class="bi bi-search"></i></button>
+                </div>
+            </div>
+        </form>
+
+        {% if search_results %}
+        <div class="row">
+            <table class="table">
+              <thead>
+                <tr>
+                  <th scope="col">#</th>
+                  <th scope="col">Score</th>
+                  <th scope="col">Passage ID</th>
+                  <th scope="col">Title</th>
+                  <th scope="col">Content</th>
+                </tr>
+              </thead>
+              <tbody class="table-group-divider">
+              {% for res in search_results %}
+                <tr class="{{'table-secondary' if res['rank'] % 2 else 'table-light'}}">
+                  <th scope="row">{{res["rank"]}}</th>
+                  <td>{{"%.2f"|format(res["score"])}}</td>
+                  <td>{{res["docid"]}}</td>
+                  <td>{{res["title"]}}</td>
+                    <td style="word-wrap: break-word;min-width: 600px;max-width: 600px;" class="text-{{'end' if lang in ('ar', 'fa') else 'start'}}">
+                        <small>{{res["doc"]}}</small>
+                    </td>
+                </tr>
+              {% endfor %}
+              </tbody>
+            </table>
+        </div>
+        {% endif %}
+    </div>
+</body>
+</html>
\ No newline at end of file
diff --git a/pyserini/dsearch.py b/pyserini/dsearch.py
new file mode 100644
index 0000000000000000000000000000000000000000..72947e9e5662f2b293a966576192abb053a9c3e6
--- /dev/null
+++ b/pyserini/dsearch.py
@@ -0,0 +1,46 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Deprecated. The package ``pyserini.dsearch` has been renamed `pyserini.search.faiss`. Stubs are retained here for
+redirection purpose to ensure that code in existing published papers remain function (with warnings)."""
+
+import os
+import sys
+
+import pyserini.search.faiss
+from pyserini.search.faiss import TctColBertQueryEncoder
+
+__all__ = ['SimpleDenseSearcher', 'BinaryDenseSearcher', 'TctColBertQueryEncoder']
+
+
+class SimpleDenseSearcher(pyserini.search.faiss.FaissSearcher):
+    def __new__(cls, *args, **kwargs):
+        print('pyserini.dsearch.SimpleDenseSearcher class has been deprecated, '
+              'please use FaissSearcher from pyserini.search.faiss instead')
+        return super().__new__(cls)
+
+
+class BinaryDenseSearcher(pyserini.search.faiss.BinaryDenseSearcher):
+    def __new__(cls, *args, **kwargs):
+        print('pyserini.dsearch.BinaryDenseSearcher class has been deprecated, '
+              'please use BinaryDenseSearcher from pyserini.search.faiss instead')
+        return super().__new__(cls)
+
+
+if __name__ == "__main__":
+    print('WARNING: pyserini.dsearch is deprecated, please use pyserini.search.faiss instead!')
+    args = " ".join(sys.argv[1:])
+    os.system(f'python -m pyserini.search.faiss {args}')
diff --git a/pyserini/encode/__init__.py b/pyserini/encode/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba0224d417c96744da02ad8b0f73f651670b5f25
--- /dev/null
+++ b/pyserini/encode/__init__.py
@@ -0,0 +1,28 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import DocumentEncoder, QueryEncoder, JsonlCollectionIterator,\
+    RepresentationWriter, FaissRepresentationWriter, JsonlRepresentationWriter, PcaEncoder
+from ._ance import AnceEncoder, AnceDocumentEncoder, AnceQueryEncoder
+from ._auto import AutoQueryEncoder, AutoDocumentEncoder
+from ._dpr import DprDocumentEncoder, DprQueryEncoder
+from ._tct_colbert import TctColBertDocumentEncoder, TctColBertQueryEncoder
+from ._aggretriever import AggretrieverDocumentEncoder, AggretrieverQueryEncoder
+from ._unicoil import UniCoilEncoder, UniCoilDocumentEncoder, UniCoilQueryEncoder
+from ._cached_data import CachedDataQueryEncoder
+from ._tok_freq import TokFreqQueryEncoder
+from ._splade import SpladeQueryEncoder
+from ._slim import SlimQueryEncoder
\ No newline at end of file
diff --git a/pyserini/encode/__main__.py b/pyserini/encode/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..64572c9fa7ce0bb7f24aa34531e96300ce8efe54
--- /dev/null
+++ b/pyserini/encode/__main__.py
@@ -0,0 +1,147 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import sys
+
+from pyserini.encode import JsonlRepresentationWriter, FaissRepresentationWriter, JsonlCollectionIterator
+from pyserini.encode import DprDocumentEncoder, TctColBertDocumentEncoder, AnceDocumentEncoder, AggretrieverDocumentEncoder, AutoDocumentEncoder
+from pyserini.encode import UniCoilDocumentEncoder
+
+
+encoder_class_map = {
+    "dpr": DprDocumentEncoder,
+    "tct_colbert": TctColBertDocumentEncoder,
+    "aggretriever": AggretrieverDocumentEncoder,
+    "ance": AnceDocumentEncoder,
+    "sentence-transformers": AutoDocumentEncoder,
+    "unicoil": UniCoilDocumentEncoder,
+    "auto": AutoDocumentEncoder,
+}
+ALLOWED_POOLING_OPTS = ["cls","mean"]
+
+def init_encoder(encoder, encoder_class, device):
+    _encoder_class = encoder_class
+
+    # determine encoder_class
+    if encoder_class is not None:
+        encoder_class = encoder_class_map[encoder_class]
+    else:
+        # if any class keyword was matched in the given encoder name,
+        # use that encoder class
+        for class_keyword in encoder_class_map:
+            if class_keyword in encoder.lower():
+                encoder_class = encoder_class_map[class_keyword]
+                break
+
+        # if none of the class keyword was matched,
+        # use the AutoDocumentEncoder
+        if encoder_class is None:
+            encoder_class = AutoDocumentEncoder
+
+    # prepare arguments to encoder class
+    kwargs = dict(model_name=encoder, device=device)
+    if (_encoder_class == "sentence-transformers") or ("sentence-transformers" in encoder):
+        kwargs.update(dict(pooling='mean', l2_norm=True))
+    if (_encoder_class == "contriever") or ("contriever" in encoder):
+        kwargs.update(dict(pooling='mean', l2_norm=False))
+    return encoder_class(**kwargs)
+
+
+def parse_args(parser, commands):
+    # Divide argv by commands
+    split_argv = [[]]
+    for c in sys.argv[1:]:
+        if c in commands.choices:
+            split_argv.append([c])
+        else:
+            split_argv[-1].append(c)
+    # Initialize namespace
+    args = argparse.Namespace()
+    for c in commands.choices:
+        setattr(args, c, None)
+    # Parse each command
+    parser.parse_args(split_argv[0], namespace=args)  # Without command
+    for argv in split_argv[1:]:  # Commands
+        n = argparse.Namespace()
+        setattr(args, argv[0], n)
+        parser.parse_args(argv, namespace=n)
+    return args
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    commands = parser.add_subparsers(title='sub-commands')
+    input_parser = commands.add_parser('input')
+    input_parser.add_argument('--corpus', type=str,
+                              help='directory that contains corpus files to be encoded, in jsonl format.',
+                              required=True)
+    input_parser.add_argument('--fields', help='fields that contents in jsonl has (in order)',
+                              nargs='+', default=['text'], required=False)
+    input_parser.add_argument('--docid-field',
+                              help='name of document id field name. If you have a custom id with a name other than "id", "_id" or "docid", then use this argument',
+                              default=None, required=False)
+    input_parser.add_argument('--delimiter', help='delimiter for the fields', default='\n', required=False)
+    input_parser.add_argument('--shard-id', type=int, help='shard-id 0-based', default=0, required=False)
+    input_parser.add_argument('--shard-num', type=int, help='number of shards', default=1, required=False)
+
+    output_parser = commands.add_parser('output')
+    output_parser.add_argument('--embeddings', type=str, help='directory to store encoded corpus', required=True)
+    output_parser.add_argument('--to-faiss', action='store_true', default=False)
+
+    encoder_parser = commands.add_parser('encoder')
+    encoder_parser.add_argument('--encoder', type=str, help='encoder name or path', required=True)
+    encoder_parser.add_argument('--encoder-class', type=str, required=False, default=None,
+                                choices=["dpr", "bpr", "tct_colbert", "ance", "sentence-transformers", "auto"],
+                                help='which query encoder class to use. `default` would infer from the args.encoder')
+    encoder_parser.add_argument('--fields', help='fields to encode', nargs='+', default=['text'], required=False)
+    encoder_parser.add_argument('--batch-size', type=int, help='batch size', default=64, required=False)
+    encoder_parser.add_argument('--max-length', type=int, help='max length', default=256, required=False)
+    encoder_parser.add_argument('--dimension', type=int, help='dimension', default=768, required=False)
+    encoder_parser.add_argument('--device', type=str, help='device cpu or cuda [cuda:0, cuda:1...]',
+                                default='cuda:0', required=False)
+    encoder_parser.add_argument('--fp16', action='store_true', default=False)
+    encoder_parser.add_argument('--add-sep', action='store_true', default=False)
+    encoder_parser.add_argument('--pooling', type=str, default='cls', help='for auto classes, allow the ability to dictate pooling strategy', required=False)
+
+    args = parse_args(parser, commands)
+    delimiter = args.input.delimiter.replace("\\n", "\n")  # argparse would add \ prior to the passed '\n\n'
+
+    encoder = init_encoder(args.encoder.encoder, args.encoder.encoder_class, device=args.encoder.device)
+    if type(encoder).__name__ == "AutoDocumentEncoder":
+        if args.encoder.pooling in ALLOWED_POOLING_OPTS:
+            encoder.pooling = args.encoder.pooling
+        else:
+            raise ValueError(f"Only allowed to use pooling types {ALLOWED_POOLING_OPTS}. You entered {args.encoder.pooling}")
+    if args.output.to_faiss:
+        embedding_writer = FaissRepresentationWriter(args.output.embeddings, dimension=args.encoder.dimension)
+    else:
+        embedding_writer = JsonlRepresentationWriter(args.output.embeddings)
+    collection_iterator = JsonlCollectionIterator(args.input.corpus, args.input.fields, args.input.docid_field, delimiter)
+
+    with embedding_writer:
+        for batch_info in collection_iterator(args.encoder.batch_size, args.input.shard_id, args.input.shard_num):
+            kwargs = {
+                'texts': batch_info['text'],
+                'titles': batch_info['title'] if 'title' in args.encoder.fields else None,
+                'expands': batch_info['expand'] if 'expand' in args.encoder.fields else None,
+                'fp16': args.encoder.fp16,
+                'max_length': args.encoder.max_length,
+                'add_sep': args.encoder.add_sep,
+            }
+            embeddings = encoder.encode(**kwargs)
+            batch_info['vector'] = embeddings
+            embedding_writer.write(batch_info, args.input.fields)
diff --git a/pyserini/encode/__pycache__/__init__.cpython-310.pyc b/pyserini/encode/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2910416445593e340c14fcd8871f29ab34d13c3c
Binary files /dev/null and b/pyserini/encode/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_aggretriever.cpython-310.pyc b/pyserini/encode/__pycache__/_aggretriever.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4735d5cb6a422069ee41b20baf670ab7816e9cf1
Binary files /dev/null and b/pyserini/encode/__pycache__/_aggretriever.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_ance.cpython-310.pyc b/pyserini/encode/__pycache__/_ance.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e33e1abc57ac3a728756f989719cca1719726e4
Binary files /dev/null and b/pyserini/encode/__pycache__/_ance.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_auto.cpython-310.pyc b/pyserini/encode/__pycache__/_auto.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..67890b7336ea5c894c98e15a1d79d1ecf49fa308
Binary files /dev/null and b/pyserini/encode/__pycache__/_auto.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_base.cpython-310.pyc b/pyserini/encode/__pycache__/_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2679bfe25ecd0008dea2cb35057971817285e72d
Binary files /dev/null and b/pyserini/encode/__pycache__/_base.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_cached_data.cpython-310.pyc b/pyserini/encode/__pycache__/_cached_data.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2ab4c5c0ca3194304da58b88bed92b08ba0d4c4
Binary files /dev/null and b/pyserini/encode/__pycache__/_cached_data.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_dpr.cpython-310.pyc b/pyserini/encode/__pycache__/_dpr.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bcb35e9cb6539e11a5964d29d828ee9902a3749f
Binary files /dev/null and b/pyserini/encode/__pycache__/_dpr.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_slim.cpython-310.pyc b/pyserini/encode/__pycache__/_slim.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..18c5a21734359836fa7d9b86029571f148c8fdf3
Binary files /dev/null and b/pyserini/encode/__pycache__/_slim.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_splade.cpython-310.pyc b/pyserini/encode/__pycache__/_splade.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e4f84d77a9399a3d7dd50b3d48b71931e2594c7
Binary files /dev/null and b/pyserini/encode/__pycache__/_splade.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_tct_colbert.cpython-310.pyc b/pyserini/encode/__pycache__/_tct_colbert.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..62034a0efa0f792f03b4cbf289e9afe4463da0d8
Binary files /dev/null and b/pyserini/encode/__pycache__/_tct_colbert.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_tok_freq.cpython-310.pyc b/pyserini/encode/__pycache__/_tok_freq.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..155571c7dd0acde6711100105bceb2aa199f2942
Binary files /dev/null and b/pyserini/encode/__pycache__/_tok_freq.cpython-310.pyc differ
diff --git a/pyserini/encode/__pycache__/_unicoil.cpython-310.pyc b/pyserini/encode/__pycache__/_unicoil.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9ace03161a21e595861e8c2ca7a22a02642c00b6
Binary files /dev/null and b/pyserini/encode/__pycache__/_unicoil.cpython-310.pyc differ
diff --git a/pyserini/encode/_aggretriever.py b/pyserini/encode/_aggretriever.py
new file mode 100644
index 0000000000000000000000000000000000000000..224eb2b05c826d61d7933b8fa81995985cb29ceb
--- /dev/null
+++ b/pyserini/encode/_aggretriever.py
@@ -0,0 +1,188 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+import numpy as np
+import torch
+from torch import Tensor
+import torch.nn as nn
+if torch.cuda.is_available():
+    from torch.cuda.amp import autocast
+
+from transformers import DistilBertConfig, BertConfig
+from transformers import AutoModelForMaskedLM, AutoTokenizer, PreTrainedModel
+from pyserini.encode import DocumentEncoder, QueryEncoder
+
+class BERTAggretrieverEncoder(PreTrainedModel):
+    config_class = BertConfig
+    base_model_prefix = 'encoder'
+    load_tf_weights = None
+
+    def __init__(self, config: BertConfig):
+        super().__init__(config)
+        self.config = config
+        self.softmax = nn.Softmax(dim=-1)
+        self.encoder = AutoModelForMaskedLM.from_config(config)
+        self.tok_proj = torch.nn.Linear(config.hidden_size, 1)
+        self.cls_proj = torch.nn.Linear(config.hidden_size, 128)
+        self.init_weights()
+
+    # Copied from https://github.com/castorini/dhr/blob/main/tevatron/Aggretriever/utils.py
+    def cal_remove_dim(self, dims, vocab_size=30522):
+        remove_dims = vocab_size % dims
+        if remove_dims > 1000: # the first 1000 tokens in BERT are useless
+            remove_dims -= dims
+        return remove_dims
+
+    # Copied from https://github.com/castorini/dhr/blob/main/tevatron/Aggretriever/utils.py
+    def aggregate(self,
+                  lexical_reps: Tensor,
+                  dims: int = 640, 
+                  remove_dims: int = -198, 
+                  full: bool = True
+    ):
+        if full:
+            remove_dims = self.cal_remove_dim(dims*2)
+            batch_size = lexical_reps.shape[0]
+            if remove_dims >= 0:
+                lexical_reps = lexical_reps[:, remove_dims:].view(batch_size, -1, dims*2)
+            else:
+                lexical_reps = torch.nn.functional.pad(lexical_reps, (0, -remove_dims), "constant", 0).view(batch_size, -1, dims*2)
+            tok_reps, _ = lexical_reps.max(1)
+            positive_tok_reps = tok_reps[:, 0:2*dims:2]
+            negative_tok_reps = tok_reps[:, 1:2*dims:2]
+            positive_mask = positive_tok_reps > negative_tok_reps
+            negative_mask = positive_tok_reps <= negative_tok_reps
+            tok_reps = positive_tok_reps * positive_mask - negative_tok_reps * negative_mask
+        else:
+            remove_dims = self.cal_remove_dim(dims)
+            batch_size = lexical_reps.shape[0]
+            lexical_reps = lexical_reps[:, remove_dims:].view(batch_size, -1, dims)
+            tok_reps, index_reps = lexical_reps.max(1)
+        return tok_reps
+
+    # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+    def _init_weights(self, module):
+        """ Initialize the weights """
+        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, torch.nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def init_weights(self):
+        self.encoder.init_weights()
+        self.tok_proj.apply(self._init_weights)
+        self.cls_proj.apply(self._init_weights)
+
+    def forward(
+            self,
+            input_ids: torch.Tensor,
+            attention_mask: Optional[torch.Tensor] = None,
+            token_type_ids: torch.Tensor = None,
+            skip_mlm: bool = False
+    ):
+        seq_out = self.encoder(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
+        seq_hidden = seq_out.hidden_states[-1] 
+        cls_hidden = seq_hidden[:,0] # get [CLS] embeddings
+        term_weights = self.tok_proj(seq_hidden[:,1:]) # batch, seq, 1
+        if not skip_mlm:
+            logits = seq_out.logits[:,1:] # batch, seq-1, vocab
+            logits = self.softmax(logits)
+            attention_mask = attention_mask[:,1:].unsqueeze(-1)
+            lexical_reps = torch.max((logits * term_weights) * attention_mask, dim=-2).values
+        else:
+            # w/o MLM
+            lexical_reps = torch.zeros(seq_hidden.shape[0], seq_hidden.shape[1], 30522, dtype=seq_hidden.dtype, device=seq_hidden.device) # (batch, len, vocab)
+            lexical_reps = torch.scatter(lexical_reps, dim=-1, index=input_ids[:,1:,None], src=term_weights)
+            lexical_reps = lexical_reps.max(-2).values
+
+        lexical_reps = self.aggregate(lexical_reps, 640)
+        semantic_reps = self.cls_proj(cls_hidden)
+        return torch.cat((semantic_reps, lexical_reps), -1)
+
+
+class DistlBERTAggretrieverEncoder(BERTAggretrieverEncoder):
+    config_class = DistilBertConfig
+    base_model_prefix = 'encoder'
+    load_tf_weights = None
+
+
+class AggretrieverDocumentEncoder(DocumentEncoder):
+    def __init__(self, model_name: str, tokenizer_name=None, device='cuda:0'):
+        self.device = device
+        if 'distilbert' in model_name.lower():
+            self.model = DistlBERTAggretrieverEncoder.from_pretrained(model_name)
+        else:
+            self.model = BERTAggretrieverEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, texts, titles=None, fp16=False,  max_length=512, **kwargs):
+        if titles is not None:
+            texts = [f'{title} {text}' for title, text in zip(titles, texts)]
+        else:
+            texts = [text for text in texts]
+        inputs = self.tokenizer(
+            texts,
+            max_length=max_length,
+            padding="longest",
+            truncation=True,
+            add_special_tokens=True,
+            return_tensors='pt'
+        )
+        inputs.to(self.device)
+        if fp16:
+            with autocast():
+                with torch.no_grad():
+                    outputs = self.model(**inputs)
+        else:
+            outputs = self.model(**inputs)
+        return outputs.detach().cpu().numpy()
+
+
+class AggretrieverQueryEncoder(QueryEncoder):
+    def __init__(self, model_name: str, tokenizer_name=None, device='cuda:0'):
+        self.device = device
+        if 'distilbert' in model_name.lower():
+            self.model = DistlBERTAggretrieverEncoder.from_pretrained(model_name)
+        else:
+            self.model = BERTAggretrieverEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, texts, fp16=False,  max_length=32, **kwargs):
+        texts = [text for text in texts]
+        inputs = self.tokenizer(
+            texts,
+            max_length=max_length,
+            padding="longest",
+            truncation=True,
+            add_special_tokens=True,
+            return_tensors='pt'
+        )
+        inputs.to(self.device)
+        if fp16:
+            with autocast():
+                with torch.no_grad():
+                    outputs = self.model(**inputs)
+        else:
+            outputs = self.model(**inputs)
+        return outputs.detach().cpu().numpy()
\ No newline at end of file
diff --git a/pyserini/encode/_ance.py b/pyserini/encode/_ance.py
new file mode 100644
index 0000000000000000000000000000000000000000..10225c2b59a6c406dddbae46a5979703e7b5f750
--- /dev/null
+++ b/pyserini/encode/_ance.py
@@ -0,0 +1,119 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+import torch
+from transformers import PreTrainedModel, RobertaConfig, RobertaModel, RobertaTokenizer
+
+from pyserini.encode import DocumentEncoder, QueryEncoder
+
+
+class AnceEncoder(PreTrainedModel):
+    config_class = RobertaConfig
+    base_model_prefix = 'ance_encoder'
+    load_tf_weights = None
+    _keys_to_ignore_on_load_missing = [r'position_ids']
+    _keys_to_ignore_on_load_unexpected = [r'pooler', r'classifier']
+
+    def __init__(self, config: RobertaConfig):
+        super().__init__(config)
+        self.config = config
+        self.roberta = RobertaModel(config)
+        self.embeddingHead = torch.nn.Linear(config.hidden_size, 768)
+        self.norm = torch.nn.LayerNorm(768)
+        self.init_weights()
+
+    # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+    def _init_weights(self, module):
+        """ Initialize the weights """
+        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, torch.nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def init_weights(self):
+        self.roberta.init_weights()
+        self.embeddingHead.apply(self._init_weights)
+        self.norm.apply(self._init_weights)
+
+    def forward(
+            self,
+            input_ids: torch.Tensor,
+            attention_mask: Optional[torch.Tensor] = None,
+    ):
+        input_shape = input_ids.size()
+        device = input_ids.device
+        if attention_mask is None:
+            attention_mask = (
+                torch.ones(input_shape, device=device)
+                if input_ids is None
+                else (input_ids != self.roberta.config.pad_token_id)
+            )
+        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
+        sequence_output = outputs.last_hidden_state
+        pooled_output = sequence_output[:, 0, :]
+        pooled_output = self.norm(self.embeddingHead(pooled_output))
+        return pooled_output
+
+
+class AnceDocumentEncoder(DocumentEncoder):
+    def __init__(self, model_name, tokenizer_name=None, device='cuda:0'):
+        self.device = device
+        self.model = AnceEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = RobertaTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, texts, titles=None,  max_length=256, **kwargs):
+        if titles is not None:
+            texts = [f'{title} {text}' for title, text in zip(titles, texts)]
+        inputs = self.tokenizer(
+            texts,
+            max_length=max_length,
+            padding='longest',
+            truncation=True,
+            add_special_tokens=True,
+            return_tensors='pt'
+        )
+        inputs.to(self.device)
+        return self.model(inputs["input_ids"]).detach().cpu().numpy()
+
+
+class AnceQueryEncoder(QueryEncoder):
+
+    def __init__(self, model_name: str, tokenizer_name: str = None, device: str = 'cpu'):
+        self.device = device
+        self.model = AnceEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = RobertaTokenizer.from_pretrained(tokenizer_name or tokenizer_name)
+
+    def encode(self, query: str, **kwargs):
+        inputs = self.tokenizer(
+            [query],
+            max_length=64,
+            padding='longest',
+            truncation=True,
+            add_special_tokens=True,
+            return_tensors='pt'
+        )
+        inputs.to(self.device)
+        embeddings = self.model(inputs["input_ids"]).detach().cpu().numpy()
+        return embeddings.flatten()
diff --git a/pyserini/encode/_auto.py b/pyserini/encode/_auto.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e8cf6cd1b778c8e2874ff482407c2174af1bdc2
--- /dev/null
+++ b/pyserini/encode/_auto.py
@@ -0,0 +1,99 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+from sklearn.preprocessing import normalize
+from transformers import AutoModel, AutoTokenizer
+
+from pyserini.encode import DocumentEncoder, QueryEncoder
+
+
+class AutoDocumentEncoder(DocumentEncoder):
+    def __init__(self, model_name, tokenizer_name=None, device='cuda:0', pooling='cls', l2_norm=False):
+        self.device = device
+        self.model = AutoModel.from_pretrained(model_name)
+        self.model.to(self.device)
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name)
+        except:
+            self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name, use_fast=False)
+        self.has_model = True
+        self.pooling = pooling
+        self.l2_norm = l2_norm
+
+    def encode(self, texts, titles=None, max_length=256, add_sep=False, **kwargs):
+        shared_tokenizer_kwargs = dict(
+            max_length=max_length,
+            truncation=True,
+            padding='longest',
+            return_attention_mask=True,
+            return_token_type_ids=False,
+            return_tensors='pt',
+            add_special_tokens=True,
+        )
+        input_kwargs = {}
+        if not add_sep:
+            input_kwargs["text"] = [f'{title} {text}' for title, text in zip(titles, texts)] if titles is not None else texts
+        else:
+            if titles is not None:
+                input_kwargs["text"] = titles
+                input_kwargs["text_pair"] = texts
+            else:
+                input_kwargs["text"] = texts
+
+        inputs = self.tokenizer(**input_kwargs, **shared_tokenizer_kwargs)
+        inputs.to(self.device)
+        outputs = self.model(**inputs)
+        if self.pooling == "mean":
+            embeddings = self._mean_pooling(outputs[0], inputs['attention_mask']).detach().cpu().numpy()
+        else:
+            embeddings = outputs[0][:, 0, :].detach().cpu().numpy()
+        if self.l2_norm:
+            embeddings = normalize(embeddings, axis=1)
+        return embeddings
+
+
+class AutoQueryEncoder(QueryEncoder):
+    def __init__(self, model_name: str, tokenizer_name: str = None, device: str = 'cpu',
+                 pooling: str = 'cls', l2_norm: bool = False, prefix=None):
+        self.device = device
+        self.model = AutoModel.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name)
+        self.pooling = pooling
+        self.l2_norm = l2_norm
+        self.prefix = prefix
+
+    def encode(self, query: str, **kwargs):
+        if self.prefix:
+            query = f'{self.prefix} {query}'
+        inputs = self.tokenizer(
+            query,
+            add_special_tokens=True,
+            return_tensors='pt',
+            truncation='only_first',
+            padding='longest',
+            return_token_type_ids=False,
+        )
+        inputs.to(self.device)
+        outputs = self.model(**inputs)[0].detach().cpu().numpy()
+        if self.pooling == "mean":
+            embeddings = np.average(outputs, axis=-2)
+        else:
+            embeddings = outputs[:, 0, :]
+        if self.l2_norm:
+            embeddings = normalize(outputs, norm='l2')
+        return embeddings.flatten()
diff --git a/pyserini/encode/_base.py b/pyserini/encode/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..09c4e282001feedfdb3468c695b0d18eccb595b8
--- /dev/null
+++ b/pyserini/encode/_base.py
@@ -0,0 +1,207 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+import os
+
+import faiss
+import torch
+import numpy as np
+from tqdm import tqdm
+
+
+class DocumentEncoder:
+    def encode(self, texts, **kwargs):
+        pass
+
+    @staticmethod
+    def _mean_pooling(last_hidden_state, attention_mask):
+        token_embeddings = last_hidden_state
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
+        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        return sum_embeddings / sum_mask
+
+
+class QueryEncoder:
+    def encode(self, text, **kwargs):
+        pass
+
+
+class PcaEncoder:
+    def __init__(self, encoder, pca_model_path):
+        self.encoder = encoder
+        self.pca_mat = faiss.read_VectorTransform(pca_model_path)
+
+    def encode(self, text, **kwargs):
+        if isinstance(text, str):
+            embeddings = self.encoder.encode(text, **kwargs)
+            embeddings = self.pca_mat.apply_py(np.array([embeddings]))
+            embeddings = embeddings[0]
+        else:
+            embeddings = self.encoder.encode(text, **kwargs)
+            embeddings = self.pca_mat.apply_py(embeddings)
+        return embeddings
+
+
+class JsonlCollectionIterator:
+    def __init__(self, collection_path: str, fields=None, docid_field=None, delimiter="\n"):
+        if fields:
+            self.fields = fields
+        else:
+            self.fields = ['text']
+        self.docid_field = docid_field
+        self.delimiter = delimiter
+        self.all_info = self._load(collection_path)
+        self.size = len(self.all_info['id'])
+        self.batch_size = 1
+        self.shard_id = 0
+        self.shard_num = 1
+
+    def __call__(self, batch_size=1, shard_id=0, shard_num=1):
+        self.batch_size = batch_size
+        self.shard_id = shard_id
+        self.shard_num = shard_num
+        return self
+
+    def __iter__(self):
+        total_len = self.size
+        shard_size = int(total_len / self.shard_num)
+        start_idx = self.shard_id * shard_size
+        end_idx = min(start_idx + shard_size, total_len)
+        if self.shard_id == self.shard_num - 1:
+            end_idx = total_len
+        to_yield = {}
+        for idx in tqdm(range(start_idx, end_idx, self.batch_size)):
+            for key in self.all_info:
+                to_yield[key] = self.all_info[key][idx: min(idx + self.batch_size, end_idx)]
+            yield to_yield
+
+    def _parse_fields_from_info(self, info):
+        """
+        :params info: dict, containing all fields as speicifed in self.fields either under 
+        the key of the field name or under the key of 'contents'.  If under `contents`, this 
+        function will parse the input contents into each fields based the self.delimiter
+        return: List, each corresponds to the value of self.fields
+        """
+        n_fields = len(self.fields)
+
+        # if all fields are under the key of info, read these rather than 'contents' 
+        if all([field in info for field in self.fields]):
+            return [info[field].strip() for field in self.fields]
+
+        assert "contents" in info, f"contents not found in info: {info}"
+        contents = info['contents']
+        # whether to remove the final self.delimiter (especially \n)
+        # in CACM, a \n is always there at the end of contents, which we want to remove;
+        # but in SciFact, Fiqa, and more, there are documents that only have title but not text (e.g. "This is title\n")
+        # where the trailing \n indicates empty fields
+        if contents.count(self.delimiter) == n_fields:
+            # the user appends one more delimiter to the end, we remove it
+            if contents.endswith(self.delimiter):
+                # not using .rstrip() as there might be more than one delimiters at the end
+                contents = contents[:-len(self.delimiter)]
+        return [field.strip(" ") for field in contents.split(self.delimiter)]
+
+    def _load(self, collection_path):
+        filenames = []
+        if os.path.isfile(collection_path):
+            filenames.append(collection_path)
+        else:
+            for filename in os.listdir(collection_path):
+                filenames.append(os.path.join(collection_path, filename))
+        all_info = {field: [] for field in self.fields}
+        all_info['id'] = []
+        for filename in filenames:
+            with open(filename) as f:
+                for line_i, line in tqdm(enumerate(f)):
+                    info = json.loads(line)
+                    if self.docid_field:
+                        _id = info.get(self.docid_field, None)
+                    else:
+                        _id = info.get('id', info.get('_id', info.get('docid', None)))
+                    if _id is None:
+                        raise ValueError(f"Cannot find f'`{self.docid_field if self.docid_field else '`id` or `_id` or `docid'}`' from {filename}.")
+                    all_info['id'].append(str(_id))
+                    fields_info = self._parse_fields_from_info(info)
+                    if len(fields_info) != len(self.fields):
+                        raise ValueError(
+                            f"{len(fields_info)} fields are found at Line#{line_i} in file {filename}." \
+                            f"{len(self.fields)} fields expected." \
+                            f"Line content: {info['contents']}"
+                        )
+
+                    for i in range(len(fields_info)):
+                        all_info[self.fields[i]].append(fields_info[i])
+        return all_info
+
+
+class RepresentationWriter:
+    def __enter__(self):
+        pass
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+    def write(self, batch_info, fields=None):
+        pass
+
+
+class JsonlRepresentationWriter(RepresentationWriter):
+    def __init__(self, dir_path):
+        self.dir_path = dir_path
+        self.filename = 'embeddings.jsonl'
+        self.file = None
+
+    def __enter__(self):
+        if not os.path.exists(self.dir_path):
+            os.makedirs(self.dir_path)
+        self.file = open(os.path.join(self.dir_path, self.filename), 'w')
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.file.close()
+
+    def write(self, batch_info, fields=None):
+        for i in range(len(batch_info['id'])):
+            contents = "\n".join([batch_info[key][i] for key in fields])
+            vector = batch_info['vector'][i]
+            vector = vector.tolist() if isinstance(vector, np.ndarray) else vector
+            self.file.write(json.dumps({'id': batch_info['id'][i],
+                                        'contents': contents,
+                                        'vector': vector}) + '\n')
+
+
+class FaissRepresentationWriter(RepresentationWriter):
+    def __init__(self, dir_path, dimension=768):
+        self.dir_path = dir_path
+        self.index_name = 'index'
+        self.id_file_name = 'docid'
+        self.dimension = dimension
+        self.index = faiss.IndexFlatIP(self.dimension)
+        self.id_file = None
+
+    def __enter__(self):
+        if not os.path.exists(self.dir_path):
+            os.makedirs(self.dir_path)
+        self.id_file = open(os.path.join(self.dir_path, self.id_file_name), 'w')
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.id_file.close()
+        faiss.write_index(self.index, os.path.join(self.dir_path, self.index_name))
+
+    def write(self, batch_info, fields=None):
+        for id_ in batch_info['id']:
+            self.id_file.write(f'{id_}\n')
+        self.index.add(np.ascontiguousarray(batch_info['vector']))
diff --git a/pyserini/encode/_cached_data.py b/pyserini/encode/_cached_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..87182a607e56121765db6690f51eaba3491f5c37
--- /dev/null
+++ b/pyserini/encode/_cached_data.py
@@ -0,0 +1,38 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+
+from pyserini.encode import QueryEncoder
+
+
+class CachedDataQueryEncoder(QueryEncoder):
+    def __init__(self, model_name_or_path):
+        self.vectors = self._load_from_jsonl(model_name_or_path)
+
+    @staticmethod
+    def _load_from_jsonl(path):
+        vectors = {}
+        with open(path) as f:
+            for line in f:
+                info = json.loads(line)
+                text = info['contents'].strip()
+                vec = info['vector']
+                vectors[text] = vec
+        return vectors
+
+    def encode(self, text, **kwargs):
+        return self.vectors[text.strip()]
diff --git a/pyserini/encode/_dpr.py b/pyserini/encode/_dpr.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e19a387cad4b9692a81c69c55141ce55130f1e5
--- /dev/null
+++ b/pyserini/encode/_dpr.py
@@ -0,0 +1,64 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from transformers import DPRContextEncoder, DPRContextEncoderTokenizer, DPRQuestionEncoder, DPRQuestionEncoderTokenizer
+
+from pyserini.encode import DocumentEncoder, QueryEncoder
+
+
+class DprDocumentEncoder(DocumentEncoder):
+    def __init__(self, model_name, tokenizer_name=None, device='cuda:0'):
+        self.device = device
+        self.model = DPRContextEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = DPRContextEncoderTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, texts, titles=None,  max_length=256, **kwargs):
+        if titles:
+            inputs = self.tokenizer(
+                titles,
+                text_pair=texts,
+                max_length=max_length,
+                padding='longest',
+                truncation=True,
+                add_special_tokens=True,
+                return_tensors='pt'
+            )
+        else:
+            inputs = self.tokenizer(
+                texts,
+                max_length=max_length,
+                padding='longest',
+                truncation=True,
+                add_special_tokens=True,
+                return_tensors='pt'
+            )
+        inputs.to(self.device)
+        return self.model(inputs["input_ids"]).pooler_output.detach().cpu().numpy()
+
+
+class DprQueryEncoder(QueryEncoder):
+    def __init__(self, model_name: str, tokenizer_name: str = None, device: str = 'cpu'):
+        self.device = device
+        self.model = DPRQuestionEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, query: str, **kwargs):
+        input_ids = self.tokenizer(query, return_tensors='pt')
+        input_ids.to(self.device)
+        embeddings = self.model(input_ids["input_ids"]).pooler_output.detach().cpu().numpy()
+        return embeddings.flatten()
diff --git a/pyserini/encode/_slim.py b/pyserini/encode/_slim.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea994631af8796c290c137ecf3a9ddd4e420a116
--- /dev/null
+++ b/pyserini/encode/_slim.py
@@ -0,0 +1,62 @@
+import torch
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+import numpy as np
+import scipy
+
+from pyserini.encode import QueryEncoder
+
+
+class SlimQueryEncoder(QueryEncoder):
+    def __init__(self, model_name_or_path, tokenizer_name=None, fusion_weight=.99, device='cpu'):
+        self.device = device
+        self.fusion_weight = fusion_weight
+        self.model = AutoModelForMaskedLM.from_pretrained(model_name_or_path)
+        self.model.to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name_or_path)
+        self.reverse_vocab = {v: k for k, v in self.tokenizer.vocab.items()}
+
+    def encode(self, text, max_length=256, topk=20, return_sparse=False, **kwargs):
+        inputs = self.tokenizer(
+            [text],
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=max_length,
+            add_special_tokens=True,
+        )
+        outputs = self.model(**inputs, return_dict=True)
+        attention_mask = inputs["attention_mask"][:, 1:] # remove the cls token
+        logits = outputs.logits[:, 1:, :] # remove the cls token prediction
+        # routing, assign every token to top-k expert
+        full_router_repr = torch.log(1 + torch.relu(logits)) * attention_mask.unsqueeze(-1)
+        expert_weights, expert_ids = torch.topk(full_router_repr, dim=2, k=topk) # B x T x topk
+        min_expert_weight = torch.min(expert_weights, -1, True)[0]
+        sparse_expert_weights = torch.where(full_router_repr >= min_expert_weight, full_router_repr, 0)
+        return self._output_to_weight_dicts(expert_weights.cpu(), expert_ids.cpu(), sparse_expert_weights.cpu(), attention_mask.cpu(), return_sparse)[0]
+
+    def _output_to_weight_dicts(self, batch_expert_weights, batch_expert_ids, batch_sparse_expert_weights, batch_attention, return_sparse):
+        to_return = []
+        for batch_id, sparse_expert_weights in enumerate(batch_sparse_expert_weights):
+            tok_vector = scipy.sparse.csr_matrix(sparse_expert_weights.detach().numpy())
+            upper_vector, lower_vector = {}, {}
+            max_term, max_weight = None, 0
+            for position, (expert_topk_ids, expert_topk_weights, attention_score) in enumerate(zip(batch_expert_ids[batch_id],
+                                                                            batch_expert_weights[batch_id],
+                                                                            batch_attention[batch_id])):
+                if attention_score > 0:
+                    for expert_id, expert_weight in zip(expert_topk_ids, expert_topk_weights):
+                        if expert_weight > 0:
+                            term, weight = self.reverse_vocab[expert_id.item()], expert_weight.item()
+                            upper_vector[term] = upper_vector.get(term, 0) + weight
+                            if weight > max_weight:
+                                max_term, max_weight = term, weight
+            if max_term is not None:
+                lower_vector[term] = lower_vector.get(term, 0) + weight
+            fusion_vector = {}
+            for term, weight in upper_vector.items():
+                fusion_vector[term] = self.fusion_weight * weight + (1 - self.fusion_weight) * lower_vector.get(term, 0)
+            if return_sparse:
+                to_return.append((fusion_vector, tok_vector))
+            else:
+                to_return.append(fusion_vector)
+        return to_return
\ No newline at end of file
diff --git a/pyserini/encode/_splade.py b/pyserini/encode/_splade.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c6d97e341b1349c3c4ef6251886db6a6e5fbf99
--- /dev/null
+++ b/pyserini/encode/_splade.py
@@ -0,0 +1,35 @@
+import torch
+from transformers import AutoModelForMaskedLM, AutoTokenizer
+import numpy as np
+
+from pyserini.encode import QueryEncoder
+
+
+class SpladeQueryEncoder(QueryEncoder):
+    def __init__(self, model_name_or_path, tokenizer_name=None, device='cpu'):
+        self.device = device
+        self.model = AutoModelForMaskedLM.from_pretrained(model_name_or_path)
+        self.model.to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or model_name_or_path)
+        self.reverse_voc = {v: k for k, v in self.tokenizer.vocab.items()}
+
+    def encode(self, text, max_length=256, **kwargs):
+        inputs = self.tokenizer([text], max_length=max_length, padding='longest',
+                                truncation=True, add_special_tokens=True,
+                                return_tensors='pt').to(self.device)
+        input_ids = inputs['input_ids']
+        input_attention = inputs['attention_mask']
+        batch_logits = self.model(input_ids)['logits']
+        batch_aggregated_logits, _ = torch.max(torch.log(1 + torch.relu(batch_logits))
+                                               * input_attention.unsqueeze(-1), dim=1)
+        batch_aggregated_logits = batch_aggregated_logits.cpu().detach().numpy()
+        return self._output_to_weight_dicts(batch_aggregated_logits)[0]
+
+    def _output_to_weight_dicts(self, batch_aggregated_logits):
+        to_return = []
+        for aggregated_logits in batch_aggregated_logits:
+            col = np.nonzero(aggregated_logits)[0]
+            weights = aggregated_logits[col]
+            d = {self.reverse_voc[k]: float(v) for k, v in zip(list(col), list(weights))}
+            to_return.append(d)
+        return to_return
diff --git a/pyserini/encode/_tct_colbert.py b/pyserini/encode/_tct_colbert.py
new file mode 100644
index 0000000000000000000000000000000000000000..e68dc7d426a746545e3b010b6caf5d3767c0167f
--- /dev/null
+++ b/pyserini/encode/_tct_colbert.py
@@ -0,0 +1,91 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import torch
+if torch.cuda.is_available():
+    from torch.cuda.amp import autocast
+from transformers import BertModel, BertTokenizer, BertTokenizerFast
+
+from pyserini.encode import DocumentEncoder, QueryEncoder
+from onnxruntime import ExecutionMode, SessionOptions, InferenceSession
+
+
+class TctColBertDocumentEncoder(DocumentEncoder):
+    def __init__(self, model_name: str, tokenizer_name=None, device='cuda:0'):
+        self.device = device
+        self.onnx = False
+        if model_name.endswith('onnx'):
+            options = SessionOptions()
+            self.session = InferenceSession(model_name, options)
+            self.onnx = True
+            self.tokenizer = BertTokenizerFast.from_pretrained(tokenizer_name or model_name[:-5])
+        else:
+            self.model = BertModel.from_pretrained(model_name)
+            self.model.to(self.device)
+            self.tokenizer = BertTokenizerFast.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, texts, titles=None, fp16=False,  max_length=512, **kwargs):
+        if titles is not None:
+            texts = [f'[CLS] [D] {title} {text}' for title, text in zip(titles, texts)]
+        else:
+            texts = ['[CLS] [D] ' + text for text in texts]
+        inputs = self.tokenizer(
+            texts,
+            max_length=max_length,
+            padding="longest",
+            truncation=True,
+            add_special_tokens=False,
+            return_tensors='pt'
+        )
+        if self.onnx:
+            inputs_onnx = {name: np.atleast_2d(value) for name, value in inputs.items()}
+            inputs.to(self.device)
+            outputs, _ = self.session.run(None, inputs_onnx)
+            outputs = torch.from_numpy(outputs).to(self.device)
+            embeddings = self._mean_pooling(outputs[:, 4:, :], inputs['attention_mask'][:, 4:])
+        else:
+            inputs.to(self.device)
+            if fp16:
+                with autocast():
+                    with torch.no_grad():
+                        outputs = self.model(**inputs)
+            else:
+                outputs = self.model(**inputs)
+            embeddings = self._mean_pooling(outputs["last_hidden_state"][:, 4:, :], inputs['attention_mask'][:, 4:])
+        return embeddings.detach().cpu().numpy()
+
+
+class TctColBertQueryEncoder(QueryEncoder):
+    def __init__(self, model_name: str, tokenizer_name: str = None, device: str = 'cpu'):
+        self.device = device
+        self.model = BertModel.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = BertTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, query: str, **kwargs):
+        max_length = 36  # hardcode for now
+        inputs = self.tokenizer(
+            '[CLS] [Q] ' + query + '[MASK]' * max_length,
+            max_length=max_length,
+            truncation=True,
+            add_special_tokens=False,
+            return_tensors='pt'
+        )
+        inputs.to(self.device)
+        outputs = self.model(**inputs)
+        embeddings = outputs.last_hidden_state.detach().cpu().numpy()
+        return np.average(embeddings[:, 4:, :], axis=-2).flatten()
diff --git a/pyserini/encode/_tok_freq.py b/pyserini/encode/_tok_freq.py
new file mode 100644
index 0000000000000000000000000000000000000000..3574c34123fc38d687b73bb0e258f4be4a0af02e
--- /dev/null
+++ b/pyserini/encode/_tok_freq.py
@@ -0,0 +1,37 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from transformers import AutoTokenizer
+
+from pyserini.encode import QueryEncoder
+
+
+class TokFreqQueryEncoder(QueryEncoder):
+    def __init__(self, model_name_or_path=None):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) if model_name_or_path else None
+
+    def encode(self, text, **kwargs):
+        vector = {}
+        if self.tokenizer is not None:
+            tok_list = self.tokenizer.tokenize(text)
+        else:
+            tok_list = text.strip().split()
+        for tok in tok_list:
+            if tok not in vector:
+                vector[tok] = 1
+            else:
+                vector[tok] += 1
+        return vector
diff --git a/pyserini/encode/_unicoil.py b/pyserini/encode/_unicoil.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0f48b131a371e9066d8de5cb2da60e5e98f1c90
--- /dev/null
+++ b/pyserini/encode/_unicoil.py
@@ -0,0 +1,175 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+import torch
+if torch.cuda.is_available():
+    from torch.cuda.amp import autocast
+from transformers import BertConfig, BertModel, BertTokenizer, PreTrainedModel
+
+from pyserini.encode import DocumentEncoder, QueryEncoder
+
+
+class UniCoilEncoder(PreTrainedModel):
+    config_class = BertConfig
+    base_model_prefix = 'coil_encoder'
+    load_tf_weights = None
+
+    def __init__(self, config: BertConfig):
+        super().__init__(config)
+        self.config = config
+        self.bert = BertModel(config)
+        self.tok_proj = torch.nn.Linear(config.hidden_size, 1)
+        self.init_weights()
+
+    # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+    def _init_weights(self, module):
+        """ Initialize the weights """
+        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, torch.nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def init_weights(self):
+        self.bert.init_weights()
+        self.tok_proj.apply(self._init_weights)
+
+    def forward(
+            self,
+            input_ids: torch.Tensor,
+            attention_mask: Optional[torch.Tensor] = None,
+    ):
+        input_shape = input_ids.size()
+        device = input_ids.device
+        if attention_mask is None:
+            attention_mask = (
+                torch.ones(input_shape, device=device)
+                if input_ids is None
+                else (input_ids != self.bert.config.pad_token_id)
+            )
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        sequence_output = outputs.last_hidden_state
+        tok_weights = self.tok_proj(sequence_output)
+        tok_weights = torch.relu(tok_weights)
+        return tok_weights
+
+
+class UniCoilDocumentEncoder(DocumentEncoder):
+    def __init__(self, model_name, tokenizer_name=None, device='cuda:0'):
+        self.device = device
+        self.model = UniCoilEncoder.from_pretrained(model_name)
+        self.model.to(self.device)
+        self.tokenizer = BertTokenizer.from_pretrained(tokenizer_name or model_name)
+
+    def encode(self, texts, titles=None, expands=None, fp16=False,  max_length=512, **kwargs):
+        if titles:
+            texts = [f'{title} {text}' for title, text in zip(titles, texts)]
+        if expands:
+            input_ids = self._tokenize_with_injects(texts, expands)
+        else:
+            input_ids = self.tokenizer(texts, max_length=max_length, padding='longest',
+                                       truncation=True, add_special_tokens=True,
+                                       return_tensors='pt').to(self.device)["input_ids"]
+        if fp16:
+            with autocast():
+                with torch.no_grad():
+                    batch_weights = self.model(input_ids).cpu().detach().numpy()
+        else:
+            batch_weights = self.model(input_ids).cpu().detach().numpy()
+        batch_token_ids = input_ids.cpu().detach().numpy()
+        return self._output_to_weight_dicts(batch_token_ids, batch_weights)
+
+    def _output_to_weight_dicts(self, batch_token_ids, batch_weights):
+        to_return = []
+        for i in range(len(batch_token_ids)):
+            weights = batch_weights[i].flatten()
+            tokens = self.tokenizer.convert_ids_to_tokens(batch_token_ids[i])
+            tok_weights = {}
+            for j in range(len(tokens)):
+                tok = str(tokens[j])
+                weight = float(weights[j])
+                if tok == '[CLS]':
+                    continue
+                if tok == '[PAD]':
+                    break
+                if tok not in tok_weights:
+                    tok_weights[tok] = weight
+                elif weight > tok_weights[tok]:
+                    tok_weights[tok] = weight
+            to_return.append(tok_weights)
+        return to_return
+
+    def _tokenize_with_injects(self, texts, expands):
+        tokenized = []
+        max_len = 0
+        for text, expand in zip(texts, expands):
+            text_ids = self.tokenizer.encode(text, add_special_tokens=False, max_length=400, truncation=True)
+            expand_ids = self.tokenizer.encode(expand, add_special_tokens=False, max_length=100, truncation=True)
+            injects = set()
+            for tok_id in expand_ids:
+                if tok_id not in text_ids:
+                    injects.add(tok_id)
+            all_tok_ids = [101] + text_ids + [102] + list(injects) + [102]  # 101: CLS, 102: SEP
+            tokenized.append(all_tok_ids)
+            cur_len = len(all_tok_ids)
+            if cur_len > max_len:
+                max_len = cur_len
+        for i in range(len(tokenized)):
+            tokenized[i] += [0] * (max_len - len(tokenized[i]))
+        return torch.tensor(tokenized, device=self.device)
+
+
+class UniCoilQueryEncoder(QueryEncoder):
+    def __init__(self, model_name_or_path, tokenizer_name=None, device='cpu'):
+        self.device = device
+        self.model = UniCoilEncoder.from_pretrained(model_name_or_path)
+        self.model.to(self.device)
+        self.tokenizer = BertTokenizer.from_pretrained(tokenizer_name or model_name_or_path)
+
+    def encode(self, text, **kwargs):
+        max_length = 128  # hardcode for now
+        input_ids = self.tokenizer([text], max_length=max_length, padding='longest',
+                                   truncation=True, add_special_tokens=True,
+                                   return_tensors='pt').to(self.device)["input_ids"]
+        batch_weights = self.model(input_ids).cpu().detach().numpy()
+        batch_token_ids = input_ids.cpu().detach().numpy()
+        return self._output_to_weight_dicts(batch_token_ids, batch_weights)[0]
+
+    def _output_to_weight_dicts(self, batch_token_ids, batch_weights):
+        to_return = []
+        for i in range(len(batch_token_ids)):
+            weights = batch_weights[i].flatten()
+            tokens = self.tokenizer.convert_ids_to_tokens(batch_token_ids[i])
+            tok_weights = {}
+            for j in range(len(tokens)):
+                tok = str(tokens[j])
+                weight = float(weights[j])
+                if tok == '[CLS]':
+                    continue
+                if tok == '[PAD]':
+                    break
+                if tok not in tok_weights:
+                    tok_weights[tok] = weight
+                else:
+                    tok_weights[tok] += weight
+            to_return.append(tok_weights)
+        return to_return
diff --git a/pyserini/encode/merge_faiss_index.py b/pyserini/encode/merge_faiss_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc0a929cd7e552237c75f955db1c18d2757ba7a4
--- /dev/null
+++ b/pyserini/encode/merge_faiss_index.py
@@ -0,0 +1,48 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import glob
+import argparse
+
+import faiss
+from tqdm import tqdm
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dimension', type=int, help='dimension of passage embeddings', required=False, default=768)
+parser.add_argument('--input', type=str, help='wildcard directory to input indexes', required=True)
+parser.add_argument('--output', type=str, help='directory to output full indexes', required=True)
+args = parser.parse_args()
+os.makedirs(args.output, exist_ok=True)
+
+# merge index
+new_index = faiss.IndexFlatIP(args.dimension)
+docid_files = []
+for index_dir in tqdm(sorted(glob.glob(args.input)), desc="Merging Faiss Index"):
+    index = faiss.read_index(os.path.join(index_dir, 'index'))
+    docid_files.append(os.path.join(index_dir, 'docid'))
+    vectors = index.reconstruct_n(0, index.ntotal)
+    new_index.add(vectors)
+
+faiss.write_index(new_index, os.path.join(args.output, 'index'))
+
+# merge docid
+with open(os.path.join(args.output, 'docid'), 'w') as wfd:
+    for f in docid_files:
+        with open(f, 'r') as f1:
+            for line in f1:
+                wfd.write(line)
diff --git a/pyserini/encode/query.py b/pyserini/encode/query.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cf53a0c69904d0872529c85107687277944956a
--- /dev/null
+++ b/pyserini/encode/query.py
@@ -0,0 +1,83 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+
+from tqdm import tqdm
+import numpy as np
+import pandas as pd
+from pyserini.query_iterator import DefaultQueryIterator
+from pyserini.encode import DprQueryEncoder, TctColBertQueryEncoder, AnceQueryEncoder, AutoQueryEncoder
+from pyserini.encode import UniCoilQueryEncoder, SpladeQueryEncoder
+
+
+def init_encoder(encoder, device):
+    if 'dpr' in encoder.lower():
+        return DprQueryEncoder(encoder, device=device)
+    elif 'tct' in encoder.lower():
+        return TctColBertQueryEncoder(encoder, device=device)
+    elif 'ance' in encoder.lower():
+        return AnceQueryEncoder(encoder, device=device, tokenizer_name='roberta-base')
+    elif 'sentence-transformers' in encoder.lower():
+        return AutoQueryEncoder(encoder, device=device, pooling='mean', l2_norm=True)
+    elif 'unicoil' in encoder.lower():
+        return UniCoilQueryEncoder(encoder, device=device)
+    elif 'splade' in encoder.lower():
+        return SpladeQueryEncoder(encoder, device=device)
+    else:
+        return AutoQueryEncoder(encoder, device=device)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--topics', type=str,
+                        help='path to topics file in tsv format or self-contained topics name', required=True)
+    parser.add_argument('--encoder', type=str, help='encoder model name or path', required=True)
+    parser.add_argument('--weight-range', type=int, help='range of weights for sparse embedding', required=False)
+    parser.add_argument('--quant-range', type=int, help='range of quantization for sparse embedding', required=False)
+    parser.add_argument('--output', type=str, help='path to stored encoded queries', required=True)
+    parser.add_argument('--device', type=str, help='device cpu or cuda [cuda:0, cuda:1...]',
+                        default='cpu', required=False)
+    args = parser.parse_args()
+
+    encoder = init_encoder(args.encoder, device=args.device)
+    query_iterator = DefaultQueryIterator.from_topics(args.topics)
+
+    is_sparse = False
+    query_ids = []
+    query_texts = []
+    query_embeddings = []
+    for topic_id, text in tqdm(query_iterator):
+        embedding = encoder.encode(text)
+        if isinstance(embedding, dict):
+            is_sparse = True
+            pseudo_str = []
+            for tok, weight in embedding.items():
+                weight_quanted = int(np.round(weight/args.weight_range*args.quant_range))
+                pseudo_str += [tok] * weight_quanted
+            pseudo_str = " ".join(pseudo_str)
+            embedding = pseudo_str
+        query_ids.append(topic_id)
+        query_texts.append(text)
+        query_embeddings.append(embedding)
+    if is_sparse:
+        with open(args.output, 'w') as f:
+            for i in range(len(query_ids)):
+                f.write(f"{query_ids[i]}\t{query_embeddings[i]}\n")
+    else:
+        embeddings = {'id': query_ids, 'text': query_texts, 'embedding': query_embeddings}
+        embeddings = pd.DataFrame(embeddings)
+        embeddings.to_pickle(args.output)
diff --git a/pyserini/encoded_corpus_info.py b/pyserini/encoded_corpus_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5c47459ef8f82e2cb5d2e37f9388cba3a2ecc77
--- /dev/null
+++ b/pyserini/encoded_corpus_info.py
@@ -0,0 +1,40 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+CORPUS_INFO = {
+    "scipy-sparse-vectors.msmarco-v1-passage-slimr": {
+        "description": "MS MARCO passages-v1 corpus encoded by SLIM trained with BM25 negatives. (Scipy)",
+        "filename": "scipy-sparse-vectors.msmarco-v1-passage-slimr.20230220.tar.gz",
+        "urls": [
+            "https://vault.cs.uwaterloo.ca/s/4MRXSmiDqNH4mgF/download",
+        ],
+        "md5": "7ec96c74dced272712fcbb091bb671a8",
+        "size (bytes)": 16533697862,
+        "documents": 8841823,
+        "downloaded": False
+    },
+    "scipy-sparse-vectors.msmarco-v1-passage-slimr-pp": {
+        "description": "MS MARCO passages-v1 corpus encoded by SLIM trained with cross-encoder distillation and hardnegative mining (Scipy)",
+        "filename": "scipy-sparse-vectors.msmarco-v1-passage-slimr-pp.20230220.tar.gz",
+        "urls": [
+            "https://vault.cs.uwaterloo.ca/s/gDJnrYGKsq6ir4w/download",
+        ],
+        "md5": "05ce2ce5f64b668a487909ab538ef2a5",
+        "size (bytes)": 15785241481,
+        "documents": 8841823,
+        "downloaded": False
+    },
+}
\ No newline at end of file
diff --git a/pyserini/encoded_query_info.py b/pyserini/encoded_query_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..620b921c93b6b1010c028e5b6ff72d75b75a19f2
--- /dev/null
+++ b/pyserini/encoded_query_info.py
@@ -0,0 +1,488 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+QUERY_INFO = {
+    "aggretriever-cocondenser-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by aggretriever-cocondenser",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-aggretriever-cocondenser-msmarco-passage-dev-subset-20230407-f627ef.tar.gz"
+        ],
+        "md5": "c30ad20c7b101e3034f41597f0fc1f67",
+        "size (bytes)": 20859862,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "aggretriever-distilbert-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by aggretriever-distilbert",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-aggretriever-distilbert-msmarco-passage-dev-subset-20230407-f627ef.tar.gz"
+        ],
+        "md5": "a6ee094bd681b08e5657ce69185eee82",
+        "size (bytes)": 20771767,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "tct_colbert-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
+        ],
+        "md5": "b2fe6494241639153f26cc61acf3b39d",
+        "size (bytes)": 20078757,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "tct_colbert-v2-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-msmarco-passage-dev-subset-20210608-5f341b.tar.gz",
+        ],
+        "md5": "ee8d76e596aef02c5027a2ffd0ff66f8",
+        "size (bytes)": 20072992,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "tct_colbert-v2-hn-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hn-msmarco-passage-dev-subset-20210608-5f341b.tar.gz",
+        ],
+        "md5": "f7e39cf2cd3ee53f7f8f2e0a1821431c",
+        "size (bytes)": 20074411,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "tct_colbert-v2-hnp-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN+",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-msmarco-passage-dev-subset-20210608-5f341b.tar.gz",
+        ],
+        "md5": "bed8036475774d12915c8af2a44612f4",
+        "size (bytes)": 20078958,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "tct_colbert-v2-hnp-dl19-passage": {
+        "description": "TREC DL19-passage queries encoded by TCT-ColBERT V2 HN+",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl19-passage-20230124-99b795.tar.gz",
+        ],
+        "md5": "ee945fb0a5b17cba4e2e5d51318fbe05",
+        "size (bytes)": 125193,
+        "total_queries": 43,
+        "downloaded": False
+    },
+    "tct_colbert-v2-hnp-dl20": {
+        "description": "TREC DL20 queries encoded by TCT-ColBERT V2 HN+",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl20-passage-20230124-99b795.tar.gz",
+        ],
+        "md5": "b940d3d38cf5a50a9467a4aa7a59d226",
+        "size (bytes)": 577645,
+        "total_queries": 200,
+        "downloaded": False
+    },
+    "ance-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by ANCE",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
+        ],
+        "md5": "adad81bb1495eff2f0463e809ecc01b8",
+        "size (bytes)": 19965095,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "ance-dl19-passage": {
+        "description": "TREC DL19 passage queries encoded by ANCE",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl19-passage-20230124-99b79.tar.gz",
+        ],
+        "md5": "828714ef5481dc49686e14b61881ba06",
+        "size (bytes)": 124468,
+        "total_queries": 43,
+        "downloaded": False
+    },
+    "ance-dl20": {
+        "description": "TREC DL20 queries encoded by ANCE",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl20-passage-20230124-99b79.tar.gz",
+        ],
+        "md5": "79acea9812a5c20d0d0817b07b348d15",
+        "size (bytes)": 574183,
+        "total_queries": 200,
+        "downloaded": False
+    },
+    "tct_colbert-msmarco-doc-dev": {
+        "description": "MS MARCO Document dev set queries encoded by TCT-ColBERT zero-shot",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-doc-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "565fe57f92b229643b68fa3263f089a9",
+        "size (bytes)": 14940124,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "ance_maxp-msmarco-doc-dev": {
+        "description": "MS MARCO Document dev set queries encoded by ANCE maxp",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_maxp-msmarco-doc-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "3d41ae797cb97e42649c4f4fa7b97d56",
+        "size (bytes)": 14854155,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "sbert-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by SBERT",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-sbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
+        ],
+        "md5": "dc0d09a0f5803824c1ad46a39417aa1e",
+        "size (bytes)": 20058701,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "distilbert_kd-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by SBERT",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-msmarco-passage-dev-subset-20210419-9323ec.tar.gz",
+        ],
+        "md5": "4706ec91183eefa9771e9311fe4799e0",
+        "size (bytes)": 20013009,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+        "distilbert_kd-dl19-passage": {
+        "description": "TREC DL19 passage queries encoded by SBERT",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl19-passage-20230124-99b79.tar.gz",
+        ],
+        "md5": "c9fe8c8112a7d4fcda1aa606af77e66a",
+        "size (bytes)": 124760,
+        "total_queries": 43,
+        "downloaded": False
+    },
+    "distilbert_kd-dl20": {
+        "description": "TREC DL20 queries encoded by SBERT",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl20-passage-20230124-99b79.tar.gz",
+        ],
+        "md5": "09fe19984515145a78183a98e44bd699",
+        "size (bytes)": 575682,
+        "total_queries": 200,
+        "downloaded": False
+    },
+    "distilbert_tas_b-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by TAS-B",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-msmarco-passage-dev-subset-20210527-63276f.tar.gz",
+        ],
+        "md5": "17a3f81de7ba497728050b83733b1c46",
+        "size (bytes)": 20016799,
+        "total_queries": 6980,
+        "downloaded": False
+    },
+    "distilbert_tas_b-dl19-passage": {
+        "description": "TREC DL19 passage queries encoded by TAS-B",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl19-passage-20230124-99b795.tar.gz",
+        ],
+        "md5": "a0a23a1be77e6e9e5dfacf32dfcd5e9b",
+        "size (bytes)": 124809,
+        "total_queries": 43,
+        "downloaded": False
+    },
+    "distilbert_tas_b-dl20": {
+        "description": "TREC DL20 queries encoded by TAS-B",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl20-passage-20230124-99b795.tar.gz",
+        ],
+        "md5": "8ffb4d5a17a2c028fb5065ef8a394ab3",
+        "size (bytes)": 575875,
+        "total_queries": 200,
+        "downloaded": False
+    },
+    "dpr_multi-nq-dev": {
+        "description": "Natural Question dev set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "c2fd32438129e4994ce2ce71e08de875",
+        "size (bytes)": 25129398,
+        "total_queries": 8757,
+        "downloaded": False
+    },
+    "dpr_multi-nq-test": {
+        "description": "Natural Question test set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "1791f1ed078beb3a00847f75023eb020",
+        "size (bytes)": 10365005,
+        "total_queries": 3610,
+        "downloaded": False
+    },
+    "ance_multi-nq-dev": {
+        "description": "Natural Question dev set questions encoded by ANCE question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "a3ed32ec8d5a474f61e3c3a9968b26fd",
+        "size (bytes)": 25163934,
+        "total_queries": 8757,
+        "downloaded": False
+    },
+    "ance_multi-nq-test": {
+        "description": "Natural Question test set questions encoded by ANCE question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "a356202b7c8f73758732c893a76a8005",
+        "size (bytes)": 10379384,
+        "total_queries": 3610,
+        "downloaded": False
+    },
+    "dpr_multi-trivia-dev": {
+        "description": "TriviaQA dev set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "efac7b71ef52ca073331e896089456a4",
+        "size (bytes)": 25517034,
+        "total_queries": 8837,
+        "downloaded": False
+    },
+    "dpr_multi-trivia-test": {
+        "description": "TriviaQA test set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "01e95455d55d0495d806549f04a02c24",
+        "size (bytes)": 32664437,
+        "total_queries": 11313,
+        "downloaded": False
+    },
+    "ance_multi-trivia-dev": {
+        "description": "TriviaQA dev set questions encoded by ANCE question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "bd88499a5785b15ba702173cc0e91417",
+        "size (bytes)": 25559775,
+        "total_queries": 8837,
+        "downloaded": False
+    },
+    "ance_multi-trivia-test": {
+        "description": "TriviaQA test set questions encoded by ANCE question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "3844dfb7f8feb6b064fa48775a35c6ee",
+        "size (bytes)": 32717910,
+        "total_queries": 11313,
+        "downloaded": False
+    },
+    "dpr_multi-wq-test": {
+        "description": "Web Questions test set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-wq-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "19aa721632d05afe031cc2da83a9a5a5",
+        "size (bytes)": 5826854,
+        "total_queries": 2032,
+        "downloaded": False
+    },
+    "dpr_multi-squad-test": {
+        "description": "SQUAD dev set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-squad-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "d11e0f801a488d51ad2a63b0748f4ae0",
+        "size (bytes)": 30328268,
+        "total_queries": 10570,
+        "downloaded": False
+    },
+    "dpr_multi-curated-test": {
+        "description": "CuratedTREC test set questions encoded by DPR question encoder trained on multiset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-curated-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "d1737d3ec5a080d93350ae76b02c7fd1",
+        "size (bytes)": 1995280,
+        "total_queries": 694,
+        "downloaded": False
+    },
+    "dpr_single_nq-nq-dev": {
+        "description": "NQ dev set questions encoded by DPR question encoder trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-dev-20210419-9323ec.tar.gz",
+        ],
+        "md5": "1a992f8d5336dc8654bba5ab7e375ebe",
+        "size (bytes)": 25123288,
+        "total_queries": 8757,
+        "downloaded": False
+    },
+    "dpr_single_nq-nq-test": {
+        "description": "NQ test set questions encoded by DPR question encoder trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-test-20210419-9323ec.tar.gz",
+        ],
+        "md5": "e64bb009b6ba8bfe40d4b9967fd69240",
+        "size (bytes)": 10362252,
+        "total_queries": 3610,
+        "downloaded": False
+     },
+    "bpr_single_nq-nq-test": {
+        "description": "NQ test set questions encoded by BPR question encoder trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-bpr_single_nq-nq-test-20210827-8a8f75.tar.gz",
+        ],
+        "md5": "b139d5a096ad52d2abc66fb54ec66158",
+        "size (bytes)": 11094680,
+        "total_queries": 3610,
+        "downloaded": False
+     },
+    "dkrr-dpr-nq-retriever-dpr-nq-dev": {
+        "description": "DPR-NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-dev-20220304-7ffa54.tar.gz",
+        ],
+        "md5": "fe1276ae841bd5be6f3e0daac144273a",
+        "size (bytes)": 25146740,
+        "total_queries": 8757,
+        "downloaded": False
+     },
+    "dkrr-dpr-nq-retriever-dpr-nq-test": {
+        "description": "DPR-NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-test-20220304-7ffa54.tar.gz",
+        ],
+        "md5": "6c7793a0a89e7d10309a6973c52de326",
+        "size (bytes)": 10370414,
+        "total_queries": 3610,
+        "downloaded": False
+     },
+    "dkrr-dpr-nq-retriever-nq-dev": {
+        "description": "NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-dev-20220304-7ffa54.tar.gz",
+        ],
+        "md5": "3c84c7fb6569d7690d5c38be61d3a5a4",
+        "size (bytes)": 25146526,
+        "total_queries": 8757,
+        "downloaded": False
+     },
+    "dkrr-dpr-nq-retriever-nq-test": {
+        "description": "NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-test-20220304-7ffa54.tar.gz",
+        ],
+        "md5": "cd3c30fc6dfde160983167b59acb17a3",
+        "size (bytes)": 10370264,
+        "total_queries": 3610,
+        "downloaded": False
+     },
+    "dkrr-dpr-tqa-retriever-dpr-tqa-dev": {
+        "description": "TriviaQA dev set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-dev-20220304-7ffa54.tar.gz",
+        ],
+        "md5": "f9ca5060cf7794b681cd4fe3d3708c4d",
+        "size (bytes)": 25540932,
+        "total_queries": 8837,
+        "downloaded": False
+     },
+    "dkrr-dpr-tqa-retriever-dpr-tqa-test": {
+        "description": "TriviaQA test set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-test-20220304-7ffa54.tar.gz",
+        ],
+        "md5": "9cbd030c3a4478b7eb8356844bacc45b",
+        "size (bytes)": 32688909,
+        "total_queries": 11313,
+        "downloaded": False
+     },
+     "wiki-6-3-all-dpr2-multi-nq-test": {
+        "description": "NQ test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-nq-test-20230103-186fa7.tar.gz",
+        ],
+        "md5": "2632ca1392a33e975d505acd5090250a",
+        "size (bytes)": 10354577,
+        "total_queries": 3610,
+        "downloaded": False
+     },
+     "wiki-6-3-all-dpr2-multi-dpr-trivia-test": {
+        "description": "TriviaQA test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-dpr-trivia-test-20230103-186fa7.tar.gz",
+        ],
+        "md5": "d0abf8ff598daaec35acd972a465b0e2",
+        "size (bytes)": 32620950,
+        "total_queries": 11313,
+        "downloaded": False
+     },
+     "openai-ada2-dl19-passage": {
+        "description": "TREC DL19 passage queries encoded by OpenAI ada2.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-dl19-passage-20230530-e3a58f.tar.gz",
+        ],
+        "md5": "ab57dab62c5b43508c661b78d6f7b6b9",
+        "size (bytes)": 418940,
+        "total_queries": 43,
+        "downloaded": False
+     },
+     "openai-ada2-dl20": {
+        "description": "TREC DL20 passage queries encoded by OpenAI ada2.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-dl20-passage-20230530-e3a58f.tar.gz",
+        ],
+        "md5": "fe711c1e146647396fd06f125882d01c",
+        "size (bytes)": 1939404,
+        "total_queries": 200,
+        "downloaded": False
+     },
+     "openai-ada2-dl19-passage-hyde": {
+        "description": "TREC DL19 passage queries encoded by HyDE-OpenAI ada2.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-hyde-dl19-passage-20230530-e3a58f.tar.gz",
+        ],
+        "md5": "bc981187dc18f3fbf21698605e2349b5",
+        "size (bytes)": 508400,
+        "total_queries": 43,
+        "downloaded": False
+     },
+     "openai-ada2-dl20-hyde": {
+        "description": "TREC DL20 passage queries encoded by HyDE-OpenAI ada2.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-hyde-dl20-passage-20230530-e3a58f.tar.gz",
+        ],
+        "md5": "12389d6affdab9231996834f7022beab",
+        "size (bytes)": 645105,
+        "total_queries": 200,
+        "downloaded": False
+     },
+     "openai-ada2-msmarco-passage-dev-subset": {
+        "description": "MS MARCO passage dev set queries encoded by OpenAI ada2.",
+        "urls": [
+            "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-openai-ada2-msmarco-passage-dev-subset-20230530-e3a58f.tar.gz",
+        ],
+        "md5": "0d9c7311e2e3819183d7ae2b4889e4ba",
+        "size (bytes)": 67615770,
+        "total_queries": 6980,
+        "downloaded": False
+     },    
+}
diff --git a/pyserini/eval/__init__.py b/pyserini/eval/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pyserini/eval/convert_msmarco_run_to_trec_run.py b/pyserini/eval/convert_msmarco_run_to_trec_run.py
new file mode 100644
index 0000000000000000000000000000000000000000..757ab62abde488a3493c89aff99c96299ad1d13b
--- /dev/null
+++ b/pyserini/eval/convert_msmarco_run_to_trec_run.py
@@ -0,0 +1,34 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Convert an MS MARCO run file to a TREC run file.')
+    parser.add_argument('--input', required=True, default='', help='Input MS MARCO run file.')
+    parser.add_argument('--output', required=True, default='', help='Output TREC run file.')
+
+    args = parser.parse_args()
+
+    with open(args.output, 'w') as fout:
+        for line in open(args.input):
+            query_id, doc_id, rank = line.strip().split('\t')
+            score = 1.0 / int(rank)
+            fout.write('{} Q0 {} {} {} anserini\n'.format(
+                query_id, doc_id, rank, score))
+
+    print('Done!')
diff --git a/pyserini/eval/convert_trec_run_to_dpr_retrieval_run.py b/pyserini/eval/convert_trec_run_to_dpr_retrieval_run.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ef59efd2bda3691edb018871b2725ff2ee02b07
--- /dev/null
+++ b/pyserini/eval/convert_trec_run_to_dpr_retrieval_run.py
@@ -0,0 +1,85 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import json
+import os
+from tqdm import tqdm
+
+from pyserini.search import get_topics, get_topics_with_reader
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.eval.evaluate_dpr_retrieval import has_answers, SimpleTokenizer
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Convert an TREC run to DPR retrieval result json.')
+    parser.add_argument('--topics', help='topic name')
+    parser.add_argument('--topics-file', help='path to a topics file')
+    parser.add_argument('--topics-reader', help='anserini TopicReader class')
+    parser.add_argument('--index', required=True, help='Anserini Index that contains raw')
+    parser.add_argument('--input', required=True, help='Input TREC run file.')
+    parser.add_argument('--store-raw', action='store_true', help='Store raw text of passage')
+    parser.add_argument('--regex', action='store_true', default=False, help="regex match")
+    parser.add_argument('--combine-title-text', action='store_true', help="Make context the concatenation of title and text.")
+    parser.add_argument('--output', required=True, help='Output DPR Retrieval json file.')
+    args = parser.parse_args()
+
+    if args.topics_file:
+        qas = get_topics_with_reader(args.topics_reader, args.topics_file)
+    elif args.topics:
+        qas = get_topics(args.topics)
+    else:
+        print("No topics file or topics name was provided")
+
+    if os.path.exists(args.index):
+        searcher = LuceneSearcher(args.index)
+    else:
+        searcher = LuceneSearcher.from_prebuilt_index(args.index)
+    if not searcher:
+        exit()
+
+    retrieval = {}
+    tokenizer = SimpleTokenizer()
+    with open(args.input) as f_in:
+        for line in tqdm(f_in.readlines()):
+            question_id, _, doc_id, _, score, _ = line.strip().split()
+            question_id = int(question_id)
+            question = qas[question_id]['title']
+            answers = qas[question_id]['answers']
+            if answers[0] == '"':
+                answers = answers[1:-1].replace('""', '"')
+            answers = eval(answers)
+            if args.combine_title_text:
+                passage = json.loads(searcher.doc(doc_id).raw())
+                ctx = passage['title'] + "\n" + passage['text']
+            else:
+                ctx = json.loads(searcher.doc(doc_id).raw())['contents']
+            if question_id not in retrieval:
+                retrieval[question_id] = {'question': question, 'answers': answers, 'contexts': []}
+            title, text = ctx.split('\n')
+            answer_exist = has_answers(text, answers, tokenizer, args.regex)
+            if args.store_raw:
+                retrieval[question_id]['contexts'].append(
+                    {'docid': doc_id,
+                     'score': score,
+                     'text': ctx,
+                     'has_answer': answer_exist}
+                )
+            else:
+                retrieval[question_id]['contexts'].append(
+                    {'docid': doc_id, 'score': score, 'has_answer': answer_exist}
+                )
+
+    json.dump(retrieval, open(args.output, 'w'), indent=4, ensure_ascii=False)
diff --git a/pyserini/eval/evaluate_dpr_retrieval.py b/pyserini/eval/evaluate_dpr_retrieval.py
new file mode 100644
index 0000000000000000000000000000000000000000..e494cd7bcfd746931f7cba8096a52a5f164f4041
--- /dev/null
+++ b/pyserini/eval/evaluate_dpr_retrieval.py
@@ -0,0 +1,280 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Most of the tokenization code here is copied from Facebook/DPR & DrQA codebase to avoid adding an extra dependency
+"""
+
+import argparse
+import copy
+import json
+import logging
+import re
+import unicodedata
+from tqdm import tqdm
+import numpy as np
+
+import regex
+
+logger = logging.getLogger(__name__)
+
+
+class Tokens(object):
+    """A class to represent a list of tokenized text."""
+    TEXT = 0
+    TEXT_WS = 1
+    SPAN = 2
+    POS = 3
+    LEMMA = 4
+    NER = 5
+
+    def __init__(self, data, annotators, opts=None):
+        self.data = data
+        self.annotators = annotators
+        self.opts = opts or {}
+
+    def __len__(self):
+        """The number of tokens."""
+        return len(self.data)
+
+    def slice(self, i=None, j=None):
+        """Return a view of the list of tokens from [i, j)."""
+        new_tokens = copy.copy(self)
+        new_tokens.data = self.data[i: j]
+        return new_tokens
+
+    def untokenize(self):
+        """Returns the original text (with whitespace reinserted)."""
+        return ''.join([t[self.TEXT_WS] for t in self.data]).strip()
+
+    def words(self, uncased=False):
+        """Returns a list of the text of each token
+        Args:
+            uncased: lower cases text
+        """
+        if uncased:
+            return [t[self.TEXT].lower() for t in self.data]
+        else:
+            return [t[self.TEXT] for t in self.data]
+
+    def offsets(self):
+        """Returns a list of [start, end) character offsets of each token."""
+        return [t[self.SPAN] for t in self.data]
+
+    def pos(self):
+        """Returns a list of part-of-speech tags of each token.
+        Returns None if this annotation was not included.
+        """
+        if 'pos' not in self.annotators:
+            return None
+        return [t[self.POS] for t in self.data]
+
+    def lemmas(self):
+        """Returns a list of the lemmatized text of each token.
+        Returns None if this annotation was not included.
+        """
+        if 'lemma' not in self.annotators:
+            return None
+        return [t[self.LEMMA] for t in self.data]
+
+    def entities(self):
+        """Returns a list of named-entity-recognition tags of each token.
+        Returns None if this annotation was not included.
+        """
+        if 'ner' not in self.annotators:
+            return None
+        return [t[self.NER] for t in self.data]
+
+    def ngrams(self, n=1, uncased=False, filter_fn=None, as_strings=True):
+        """Returns a list of all ngrams from length 1 to n.
+        Args:
+            n: upper limit of ngram length
+            uncased: lower cases text
+            filter_fn: user function that takes in an ngram list and returns
+              True or False to keep or not keep the ngram
+            as_string: return the ngram as a string vs list
+        """
+
+        def _skip(gram):
+            if not filter_fn:
+                return False
+            return filter_fn(gram)
+
+        words = self.words(uncased)
+        ngrams = [(s, e + 1)
+                  for s in range(len(words))
+                  for e in range(s, min(s + n, len(words)))
+                  if not _skip(words[s:e + 1])]
+
+        # Concatenate into strings
+        if as_strings:
+            ngrams = ['{}'.format(' '.join(words[s:e])) for (s, e) in ngrams]
+
+        return ngrams
+
+    def entity_groups(self):
+        """Group consecutive entity tokens with the same NER tag."""
+        entities = self.entities()
+        if not entities:
+            return None
+        non_ent = self.opts.get('non_ent', 'O')
+        groups = []
+        idx = 0
+        while idx < len(entities):
+            ner_tag = entities[idx]
+            # Check for entity tag
+            if ner_tag != non_ent:
+                # Chomp the sequence
+                start = idx
+                while (idx < len(entities) and entities[idx] == ner_tag):
+                    idx += 1
+                groups.append((self.slice(start, idx).untokenize(), ner_tag))
+            else:
+                idx += 1
+        return groups
+
+
+class Tokenizer(object):
+    """Base tokenizer class.
+    Tokenizers implement tokenize, which should return a Tokens class.
+    """
+
+    def tokenize(self, text):
+        raise NotImplementedError
+
+    def shutdown(self):
+        pass
+
+    def __del__(self):
+        self.shutdown()
+
+
+class SimpleTokenizer(Tokenizer):
+    ALPHA_NUM = r'[\p{L}\p{N}\p{M}]+'
+    NON_WS = r'[^\p{Z}\p{C}]'
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            annotators: None or empty set (only tokenizes).
+        """
+        self._regexp = regex.compile(
+            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
+            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
+        )
+        if len(kwargs.get('annotators', {})) > 0:
+            logger.warning('%s only tokenizes! Skipping annotators: %s' %
+                           (type(self).__name__, kwargs.get('annotators')))
+        self.annotators = set()
+
+    def tokenize(self, text):
+        data = []
+        matches = [m for m in self._regexp.finditer(text)]
+        for i in range(len(matches)):
+            # Get text
+            token = matches[i].group()
+
+            # Get whitespace
+            span = matches[i].span()
+            start_ws = span[0]
+            if i + 1 < len(matches):
+                end_ws = matches[i + 1].span()[0]
+            else:
+                end_ws = span[1]
+
+            # Format data
+            data.append((
+                token,
+                text[start_ws: end_ws],
+                span,
+            ))
+        return Tokens(data, self.annotators)
+
+
+def regex_match(text, pattern):
+    """Test if a regex pattern is contained within a text."""
+    try:
+        pattern = re.compile(
+            pattern,
+            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE,
+        )
+    except BaseException:
+        return False
+    return pattern.search(text) is not None
+
+
+def _normalize(text):
+    return unicodedata.normalize('NFD', text)
+
+
+def has_answers(text, answers, tokenizer, regex=False):
+    text = _normalize(text)
+    if regex:
+        for ans in answers:
+            ans = _normalize(ans)
+            if regex_match(text, ans):
+                return True
+    else:
+        text = tokenizer.tokenize(text).words(uncased=True)
+        for ans in answers:
+            ans = _normalize(ans)
+            ans = tokenizer.tokenize(ans).words(uncased=True)
+            for i in range(0, len(text) - len(ans) + 1):
+                if ans == text[i: i + len(ans)]:
+                    return True
+    return False
+
+
+def evaluate_retrieval(retrieval_file, topk, regex=False):
+    tokenizer = SimpleTokenizer()
+    retrieval = json.load(open(retrieval_file))
+    accuracy = { k : [] for k in topk }
+    max_k = max(topk)
+
+    for qid in tqdm(list(retrieval.keys())):
+        answers = retrieval[qid]['answers']
+        contexts = retrieval[qid]['contexts']
+        has_ans_idx = max_k  # first index in contexts that has answers
+
+        for idx, ctx in enumerate(contexts):
+            if idx >= max_k:
+                break
+            if 'has_answer' in ctx:
+                if ctx['has_answer']:
+                    has_ans_idx = idx
+                    break
+            else:
+                text = ctx['text'].split('\n')[1]  # [0] is title, [1] is text
+                if has_answers(text, answers, tokenizer, regex):
+                    has_ans_idx = idx
+                    break
+
+        for k in topk:
+            accuracy[k].append(0 if has_ans_idx >= k else 1)
+
+    for k in topk:
+        print(f'Top{k}\taccuracy: {np.mean(accuracy[k]):.4f}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--retrieval', type=str, metavar='path',
+                        help="Path to retrieval output file.")
+    parser.add_argument('--topk', type=int, nargs='+', help="topk to evaluate")
+    parser.add_argument('--regex', action='store_true', default=False, help="regex match")
+    args = parser.parse_args()
+
+    evaluate_retrieval(args.retrieval, args.topk, args.regex)
diff --git a/pyserini/eval/evaluate_kilt_retrieval.py b/pyserini/eval/evaluate_kilt_retrieval.py
new file mode 100644
index 0000000000000000000000000000000000000000..6de8ac2ece2718cdc5f33e25e1b950770d04ad6f
--- /dev/null
+++ b/pyserini/eval/evaluate_kilt_retrieval.py
@@ -0,0 +1,377 @@
+# NOTE: This code is taken from the original KILT library's retrieval evaluation script
+# https://github.com/facebookresearch/KILT/blob/9bcb119a7ed5fda88826058b062d0e45c726c676/kilt/eval_retrieval.py
+
+import argparse
+import pprint
+import json
+from collections import defaultdict, OrderedDict
+
+import os
+from pyserini.query_iterator import KiltQueryIterator
+
+
+##########################################################################################
+# Replaced:
+# from kilt import kilt_utils
+# With the following directly imported code:
+
+def load_data(filename):
+    data = []
+    with open(filename, "r") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            data.append(json.loads(line))
+    return data
+
+
+##########################################################################################
+# Replaced:
+# from kilt import eval_downstream
+# With the following directly imported code:
+
+def validate_input(gold_records, guess_records):
+
+    if len(gold_records) != len(guess_records):
+        print(
+            "WARNING: DIFFERENT SIZE gold: {} guess: {}".format(
+                len(gold_records), len(guess_records)
+            )
+        )
+
+    # align order
+    gold_ids = []
+    for gold in gold_records:
+        assert str(gold["id"]).strip() not in gold_ids, "Gold IDs should be unique"
+        gold_ids.append(str(gold["id"]).strip())
+
+    id2guess_record = {}
+    for guess in guess_records:
+        assert (
+            str(guess["id"]).strip() not in id2guess_record
+        ), "Prediction IDs should be unique"
+        id2guess_record[str(guess["id"]).strip()] = guess
+
+    guess_records = []
+    for id in gold_ids:
+        if id in id2guess_record:
+            guess_records.append(id2guess_record[id])
+        else:
+            raise ValueError("ERROR: no prediction provided for id: {}".format(id))
+
+    return gold_records, guess_records
+
+##########################################################################################
+
+
+def _remove_duplicates(obj):
+    obj_tmp = []
+    for o in obj:
+        if o not in obj_tmp:
+            obj_tmp.append(o)
+    return obj_tmp
+
+
+def _get_ids_list(datapoint, rank_keys, verbose=False):
+    # collect all gold ids
+    ids_list = []
+    for output in datapoint["output"]:
+        current_ids_list = []
+        if "provenance" in output:
+            for provenance in output["provenance"]:
+                if any(rank_key not in provenance for rank_key in rank_keys):
+                    missing = set(rank_keys) - set(
+                        list(provenance.keys())
+                    ).intersection(set(rank_keys))
+                    if verbose:
+                        print(
+                            f"WARNING: missing key(s) {missing} in provenance, unable to compute retrieval for those."
+                        )
+                else:
+                    current_ids_list.append(
+                        "+".join(
+                            [
+                                str(provenance[rank_key]).strip()
+                                for rank_key in rank_keys
+                            ]
+                        )
+                    )
+        ids_list.append(_remove_duplicates(current_ids_list))  # remove duplicates
+
+    # consider only unique ids
+    return ids_list
+
+
+def get_rank(guess_item, gold_item, k, rank_keys, verbose=False):
+    """
+    The main idea is to consider each evidence set as a single point in the rank.
+    The score in the rank for an evidence set is given by the lowest scored evidence in the set.
+    """
+
+    assert k > 0, "k must be a positive integer grater than 0."
+
+    rank = []
+    num_distinct_evidence_sets = 0
+
+    guess_ids = _get_ids_list(guess_item, rank_keys)[0]
+
+    if guess_ids and len(guess_ids) > 0:
+
+        # 1. collect evidence sets and their sizes
+        evidence_sets = []
+        e_size = defaultdict(int)
+        for output in gold_item["output"]:
+            if "provenance" in output:
+                e_set = {
+                    "+".join(
+                        [str(provenance[rank_key]).strip() for rank_key in rank_keys]
+                    )
+                    for provenance in output["provenance"]
+                }
+                if e_set not in evidence_sets:  # no duplicate evidence set
+                    evidence_sets.append(e_set)
+                    e_size[len(e_set)] += 1
+        num_distinct_evidence_sets = len(evidence_sets)
+
+        # 2. check what's the minimum number of predicted pages needed to get a robust P/R@k
+        min_prediction_size = 0
+        c = 0
+        for size, freq in sorted(e_size.items(), reverse=True):
+            for _ in range(freq):
+                min_prediction_size += size
+                c += 1
+                if c == k:
+                    break
+            if c == k:
+                break
+        # if the number of evidence sets is smaller than k
+        min_prediction_size += k - c
+
+        if verbose and len(guess_ids) < min_prediction_size:
+            print(
+                f"WARNING: you should provide at least {min_prediction_size} provenance items for a robust recall@{k} computation (you provided {len(guess_ids)} item(s))."
+            )
+
+        # 3. rank by gruping pages in each evidence set (each evidence set count as 1),
+        # the position in the rank of each evidence set is given by the last page in guess_ids
+        # non evidence pages counts as 1
+        rank = []
+        for guess_id in guess_ids:
+            guess_id = str(guess_id).strip()
+            found = False
+            for idx, e_set in enumerate(evidence_sets):
+
+                e_set_id = f"evidence_set:{idx}"
+
+                if guess_id in e_set:
+                    found = True
+
+                    # remove from the rank previous points referring to this evidence set
+                    if e_set_id in rank:
+                        rank.remove(e_set_id)
+
+                    # remove the guess_id from the evidence set
+                    e_set.remove(guess_id)
+
+                    if len(e_set) == 0:
+                        # it was the last evidence, it counts as true in the rank
+                        rank.append(True)
+                    else:
+                        # add a point for this partial evidence set
+                        rank.append(e_set_id)
+
+            if not found:
+                rank.append(False)
+
+    return rank, num_distinct_evidence_sets
+
+
+# 1. Precision computation
+def _precision_at_k(rank, k):
+
+    # precision @ k
+    p = rank[:k].count(True) / k
+
+    return p
+
+
+# 2. Recall computation
+def _recall_at_k(rank, num_distinct_evidence_sets, k):
+
+    r = rank[:k].count(True) / num_distinct_evidence_sets
+
+    return r
+
+
+# 3. Success rate computation
+def _success_rate_at_k(rank, k):
+
+    # success rate @ k
+    p = int(True in rank[:k])
+
+    return p
+
+
+def _computeRprec(guess_ids, gold_ids):
+
+    R = len(gold_ids)
+    num = 0
+
+    for prediction in guess_ids[:R]:
+        if str(prediction).strip() in gold_ids:
+            num += 1
+
+    Rprec = num / R if R > 0 else 0
+    return Rprec
+
+
+# R-precision https://link.springer.com/referenceworkentry/10.1007%2F978-0-387-39940-9_486
+def rprecision(guess_item, gold_item, rank_keys):
+    gold_ids_list = _get_ids_list(gold_item, rank_keys)
+    guess_ids = _get_ids_list(guess_item, rank_keys)[0]
+    Rprec_vector = []
+    for gold_ids in gold_ids_list:
+        Rprec = _computeRprec(guess_ids, gold_ids)
+        Rprec_vector.append(Rprec)
+    return max(Rprec_vector)
+
+
+def get_ranking_metrics(guess_item, gold_item, ks, rank_keys):
+
+    Rprec = 0
+    P_at_k = {"precision@{}".format(k): 0 for k in sorted(ks) if k > 0}
+    R_at_k = {"recall@{}".format(k): 0 for k in sorted(ks) if k > 1}
+    S_at_k = {"success_rate@{}".format(k): 0 for k in sorted(ks) if k > 1}
+
+    assert (
+        "output" in guess_item and len(guess_item["output"]) == 1
+    ), f"guess should provide exactly one output for {guess_item['id']}"
+
+    Rprec = rprecision(guess_item, gold_item, rank_keys=rank_keys)
+    for k in ks:
+
+        # 0. get rank
+        rank, num_distinct_evidence_sets = get_rank(
+            guess_item, gold_item, k, rank_keys=rank_keys
+        )
+
+        if num_distinct_evidence_sets > 0:
+
+            # 1. precision
+            P_at_k["precision@{}".format(k)] = _precision_at_k(rank, k)
+
+            # 2. recall
+            R_at_k["recall@{}".format(k)] = _recall_at_k(
+                rank, num_distinct_evidence_sets, k
+            )
+
+            # 3. success rate
+            S_at_k["success_rate@{}".format(k)] = _success_rate_at_k(rank, k)
+
+        # else:
+        #     print(
+        #         "WARNING: the number of distinct evidence sets is 0 for {}".format(
+        #             gold_item
+        #         )
+        #     )
+
+    return {"Rprec": Rprec, **P_at_k, **R_at_k, **S_at_k}
+
+
+def compute(gold_dataset, guess_dataset, ks, rank_keys):
+
+    ks = sorted([int(x) for x in ks])
+
+    result = OrderedDict()
+    result["Rprec"] = 0.0
+    for k in ks:
+        if k > 0:
+            result["precision@{}".format(k)] = 0.0
+        if k > 1:
+            result["recall@{}".format(k)] = 0.0
+            result["success_rate@{}".format(k)] = 0.0
+
+    assert len(guess_dataset) == len(
+        gold_dataset
+    ), "different size gold: {} guess: {}".format(len(guess_dataset), len(gold_dataset))
+
+    for gold, guess in zip(guess_dataset, gold_dataset):
+        assert (
+            str(gold["id"]).strip() == str(guess["id"]).strip()
+        ), "Items must have same order with same IDs"
+
+    for guess_item, gold_item in zip(guess_dataset, gold_dataset):
+        ranking_metrics = get_ranking_metrics(guess_item, gold_item, ks, rank_keys)
+        result["Rprec"] += ranking_metrics["Rprec"]
+        for k in ks:
+            if k > 0:
+                result["precision@{}".format(k)] += ranking_metrics[
+                    "precision@{}".format(k)
+                ]
+            if k > 1:
+                result["recall@{}".format(k)] += ranking_metrics["recall@{}".format(k)]
+                result["success_rate@{}".format(k)] += ranking_metrics[
+                    "success_rate@{}".format(k)
+                ]
+
+    if len(guess_dataset) > 0:
+        result["Rprec"] /= len(guess_dataset)
+        for k in ks:
+            if k > 0:
+                result["precision@{}".format(k)] /= len(guess_dataset)
+            if k > 1:
+                result["recall@{}".format(k)] /= len(guess_dataset)
+                result["success_rate@{}".format(k)] /= len(guess_dataset)
+
+    return result
+
+
+def evaluate(gold, guess, ks, rank_keys):
+    pp = pprint.PrettyPrinter(indent=4)
+
+    gold_dataset = load_data(gold)
+    guess_dataset = load_data(guess)
+
+    # 0. validate input
+    gold_dataset, guess_dataset = validate_input(
+        gold_dataset, guess_dataset
+    )
+
+    # 1. get retrieval metrics
+    result = compute(gold_dataset, guess_dataset, ks, rank_keys)
+
+    pp.pprint(result)
+    return result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("guess", help="Guess KILT file")
+    parser.add_argument("gold", help="Gold KILT file")
+    parser.add_argument(
+        "--ks",
+        type=str,
+        required=False,
+        default="1,5,10,20",
+        help="Comma separated list of positive integers for recall@k and precision@k",
+    )
+    parser.add_argument(
+        "--rank_keys",
+        type=str,
+        required=False,
+        default="wikipedia_id",
+        help="Comma separated list of rank keys for recall@k and precision@k",
+    )
+
+    args = parser.parse_args()
+    args.ks = [int(k) for k in args.ks.split(",")]
+    args.rank_keys = [rank_key for rank_key in args.rank_keys.split(",")]
+
+    ##########################################################################################
+    # Pyserini change:
+    # Download gold file if necessary
+    gold = args.gold
+    if not os.path.exists(args.gold):
+        gold = KiltQueryIterator.download_kilt_topics(gold)
+    ##########################################################################################
+
+    evaluate(gold, args.guess, args.ks, args.rank_keys)
diff --git a/pyserini/eval/evaluate_qa_overlap_retrieval.py b/pyserini/eval/evaluate_qa_overlap_retrieval.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd5163922e4ab895fd989f2d2fee4555cdf8475a
--- /dev/null
+++ b/pyserini/eval/evaluate_qa_overlap_retrieval.py
@@ -0,0 +1,326 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Most of the tokenization code here is copied from Facebook/DPR & DrQA codebase to avoid adding an extra dependency
+"""
+
+import argparse
+import copy
+import json
+import logging
+import re
+import unicodedata
+from tqdm import tqdm
+import numpy as np
+import os
+import regex
+import collections
+
+logger = logging.getLogger(__name__)
+
+
+DIRNAME = os.path.dirname(os.path.abspath(__file__))
+# download dependencies
+if not os.path.exists('data/nq-annotations.jsonl'):
+    ANNOTATIONS_TO_DOWNLOAD = [
+        ('https://dl.fbaipublicfiles.com/qaoverlap/data/nq-annotations.jsonl','nq-annotations.jsonl'),
+        ('https://dl.fbaipublicfiles.com/qaoverlap/data/triviaqa-annotations.jsonl', 'triviaqa-annotations.jsonl'),
+        ('https://dl.fbaipublicfiles.com/qaoverlap/data/webquestions-annotations.jsonl','webquestions-annotations.jsonl')
+    ]
+
+    for link, dest in ANNOTATIONS_TO_DOWNLOAD:
+        os.system(f'wget {link} -P data/')
+
+ANNOTATION_PATHS = {
+    'tqa': os.path.join(DIRNAME, '../../data/triviaqa-annotations.jsonl'),
+    'nq': os.path.join(DIRNAME, '../../data/nq-annotations.jsonl'),
+    'webquestions': os.path.join(DIRNAME, '../../data/webquestions-annotations.jsonl'),
+}
+
+class Tokens(object):
+    """A class to represent a list of tokenized text."""
+    TEXT = 0
+    TEXT_WS = 1
+    SPAN = 2
+    POS = 3
+    LEMMA = 4
+    NER = 5
+
+    def __init__(self, data, annotators, opts=None):
+        self.data = data
+        self.annotators = annotators
+        self.opts = opts or {}
+
+    def __len__(self):
+        """The number of tokens."""
+        return len(self.data)
+
+    def slice(self, i=None, j=None):
+        """Return a view of the list of tokens from [i, j)."""
+        new_tokens = copy.copy(self)
+        new_tokens.data = self.data[i: j]
+        return new_tokens
+
+    def untokenize(self):
+        """Returns the original text (with whitespace reinserted)."""
+        return ''.join([t[self.TEXT_WS] for t in self.data]).strip()
+
+    def words(self, uncased=False):
+        """Returns a list of the text of each token
+        Args:
+            uncased: lower cases text
+        """
+        if uncased:
+            return [t[self.TEXT].lower() for t in self.data]
+        else:
+            return [t[self.TEXT] for t in self.data]
+
+    def offsets(self):
+        """Returns a list of [start, end) character offsets of each token."""
+        return [t[self.SPAN] for t in self.data]
+
+    def pos(self):
+        """Returns a list of part-of-speech tags of each token.
+        Returns None if this annotation was not included.
+        """
+        if 'pos' not in self.annotators:
+            return None
+        return [t[self.POS] for t in self.data]
+
+    def lemmas(self):
+        """Returns a list of the lemmatized text of each token.
+        Returns None if this annotation was not included.
+        """
+        if 'lemma' not in self.annotators:
+            return None
+        return [t[self.LEMMA] for t in self.data]
+
+    def entities(self):
+        """Returns a list of named-entity-recognition tags of each token.
+        Returns None if this annotation was not included.
+        """
+        if 'ner' not in self.annotators:
+            return None
+        return [t[self.NER] for t in self.data]
+
+    def ngrams(self, n=1, uncased=False, filter_fn=None, as_strings=True):
+        """Returns a list of all ngrams from length 1 to n.
+        Args:
+            n: upper limit of ngram length
+            uncased: lower cases text
+            filter_fn: user function that takes in an ngram list and returns
+              True or False to keep or not keep the ngram
+            as_string: return the ngram as a string vs list
+        """
+
+        def _skip(gram):
+            if not filter_fn:
+                return False
+            return filter_fn(gram)
+
+        words = self.words(uncased)
+        ngrams = [(s, e + 1)
+                  for s in range(len(words))
+                  for e in range(s, min(s + n, len(words)))
+                  if not _skip(words[s:e + 1])]
+
+        # Concatenate into strings
+        if as_strings:
+            ngrams = ['{}'.format(' '.join(words[s:e])) for (s, e) in ngrams]
+
+        return ngrams
+
+    def entity_groups(self):
+        """Group consecutive entity tokens with the same NER tag."""
+        entities = self.entities()
+        if not entities:
+            return None
+        non_ent = self.opts.get('non_ent', 'O')
+        groups = []
+        idx = 0
+        while idx < len(entities):
+            ner_tag = entities[idx]
+            # Check for entity tag
+            if ner_tag != non_ent:
+                # Chomp the sequence
+                start = idx
+                while (idx < len(entities) and entities[idx] == ner_tag):
+                    idx += 1
+                groups.append((self.slice(start, idx).untokenize(), ner_tag))
+            else:
+                idx += 1
+        return groups
+
+
+class Tokenizer(object):
+    """Base tokenizer class.
+    Tokenizers implement tokenize, which should return a Tokens class.
+    """
+
+    def tokenize(self, text):
+        raise NotImplementedError
+
+    def shutdown(self):
+        pass
+
+    def __del__(self):
+        self.shutdown()
+
+
+class SimpleTokenizer(Tokenizer):
+    ALPHA_NUM = r'[\p{L}\p{N}\p{M}]+'
+    NON_WS = r'[^\p{Z}\p{C}]'
+
+    def __init__(self, **kwargs):
+        """
+        Args:
+            annotators: None or empty set (only tokenizes).
+        """
+        self._regexp = regex.compile(
+            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
+            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
+        )
+        if len(kwargs.get('annotators', {})) > 0:
+            logger.warning('%s only tokenizes! Skipping annotators: %s' %
+                           (type(self).__name__, kwargs.get('annotators')))
+        self.annotators = set()
+
+    def tokenize(self, text):
+        data = []
+        matches = [m for m in self._regexp.finditer(text)]
+        for i in range(len(matches)):
+            # Get text
+            token = matches[i].group()
+
+            # Get whitespace
+            span = matches[i].span()
+            start_ws = span[0]
+            if i + 1 < len(matches):
+                end_ws = matches[i + 1].span()[0]
+            else:
+                end_ws = span[1]
+
+            # Format data
+            data.append((
+                token,
+                text[start_ws: end_ws],
+                span,
+            ))
+        return Tokens(data, self.annotators)
+
+
+def regex_match(text, pattern):
+    """Test if a regex pattern is contained within a text."""
+    try:
+        pattern = re.compile(
+            pattern,
+            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE,
+        )
+    except BaseException:
+        return False
+    return pattern.search(text) is not None
+
+
+def _normalize(text):
+    return unicodedata.normalize('NFD', text)
+
+
+def read_jsonl(path):
+    with open(path) as f:
+        return [json.loads(l) for l in f]
+
+
+def read_annotations(annotations_data_path):
+    return read_jsonl(annotations_data_path)
+
+
+def has_answers(text, answers, tokenizer, regex=False):
+    text = _normalize(text)
+    if regex:
+        for ans in answers:
+            ans = _normalize(ans)
+            if regex_match(text, ans):
+                return True
+    else:
+        text = tokenizer.tokenize(text).words(uncased=True)
+        for ans in answers:
+            ans = _normalize(ans)
+            ans = tokenizer.tokenize(ans).words(uncased=True)
+            for i in range(0, len(text) - len(ans) + 1):
+                if ans == text[i: i + len(ans)]:
+                    return True
+    return False
+
+
+def evaluate_retrieval(retrieval_file, topk, annotation_file, regex=False):
+    tokenizer = SimpleTokenizer()
+    retrieval = json.load(open(retrieval_file))
+    annotations = read_annotations(annotation_file)
+    annotation_ids = {int(a['id']): a['labels'] for a in annotations}
+    accuracy = { k : collections.defaultdict(list) for k in topk }
+    max_k = max(topk)
+    annotation_labels = [
+        'total',
+        'no_overlap',
+        'question_overlap',
+        'no_question_overlap',
+        'answer_overlap',
+        'no_answer_overlap',
+        'answer_overlap_only'
+    ]
+
+    
+    for qid in retrieval.keys():
+        answers = retrieval[qid]['answers']
+        contexts = retrieval[qid]['contexts']
+        has_ans_idx = max_k  # first index in contexts that has answers
+
+        for idx, ctx in enumerate(contexts):
+            if idx >= max_k:
+                break
+            if 'has_answer' in ctx:
+                if ctx['has_answer']:
+                    has_ans_idx = idx
+                    break
+            else:
+                text = ctx['text'].split('\n')[1]  # [0] is title, [1] is text
+                if has_answers(text, answers, tokenizer, regex):
+                    has_ans_idx = idx
+                    break
+        
+        for annotation_label in annotation_labels:
+            if annotation_label in annotation_ids[int(qid)] or annotation_label == 'total' or \
+             (annotation_label == 'no_overlap' and ('no_question_overlap' in annotation_ids[int(qid)]) and ('no_answer_overlap' in annotation_ids[int(qid)])):
+                for k in topk:
+                    accuracy[k][annotation_label].append(0 if has_ans_idx >= k else 1)
+
+    for k in topk:
+        for annotation_label in annotation_labels:
+            print(f'Top{k}\taccuracy: {np.mean(accuracy[k][annotation_label])} \t {annotation_label}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--retrieval', type=str, metavar='path',
+                        help="Path to retrieval output file.")
+    parser.add_argument('--topk', type=int, nargs='+', help="topk to evaluate")
+    parser.add_argument('--regex', action='store_true', default=False, help="regex match")
+    parser.add_argument('--dataset_name', choices=['nq', 'tqa', 'webquestions'], type=str,
+                        help='name of datset to evaluate on')
+    args = parser.parse_args()
+
+    evaluate_retrieval(args.retrieval, args.topk, ANNOTATION_PATHS[args.dataset_name], args.regex)
diff --git a/pyserini/eval/msmarco_doc_eval.py b/pyserini/eval/msmarco_doc_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b818cde3c924d231112eb9f7ec9948af0c08d4e
--- /dev/null
+++ b/pyserini/eval/msmarco_doc_eval.py
@@ -0,0 +1,46 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import subprocess
+import sys
+import platform
+
+from pyserini.search import get_qrels_file
+from pyserini.util import download_evaluation_script
+
+script_path = download_evaluation_script('msmarco_doc_eval')
+cmd_prefix = ['python', script_path]
+args = sys.argv
+if len(args) > 1:
+    cmd = cmd_prefix + args[1:]
+    for i in range(len(cmd)-1):
+        if cmd[i] == '--judgments':
+            if not os.path.exists(cmd[i+1]):
+                cmd[i+1] = get_qrels_file(cmd[i + 1])
+else:
+    cmd = cmd_prefix
+print(f'Running command: {cmd}')
+shell = platform.system() == "Windows"
+process = subprocess.Popen(cmd,
+                           stdout=subprocess.PIPE,
+                           stderr=subprocess.PIPE,
+                           shell=shell)
+stdout, stderr = process.communicate()
+if stderr:
+    print(stderr.decode("utf-8"))
+print('Results:')
+print(stdout.decode("utf-8"))
diff --git a/pyserini/eval/msmarco_passage_eval.py b/pyserini/eval/msmarco_passage_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5a07f950c9f0eaf00dbf60d8bcd16a69eadeafb
--- /dev/null
+++ b/pyserini/eval/msmarco_passage_eval.py
@@ -0,0 +1,44 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import subprocess
+import sys
+import platform
+
+from pyserini.search import get_qrels_file
+from pyserini.util import download_evaluation_script
+
+script_path = download_evaluation_script('msmarco_passage_eval')
+cmd_prefix = ['python', script_path]
+args = sys.argv
+if len(args) > 1:
+    cmd = cmd_prefix + args[1:]
+    if not os.path.exists(cmd[-2]):
+        cmd[-2] = get_qrels_file(cmd[-2])
+else:
+    cmd = cmd_prefix
+print(f'Running command: {cmd}')
+shell = platform.system() == "Windows"
+process = subprocess.Popen(cmd,
+                           stdout=subprocess.PIPE,
+                           stderr=subprocess.PIPE,
+                           shell=shell)
+stdout, stderr = process.communicate()
+if stderr:
+    print(stderr.decode("utf-8"))
+print('Results:')
+print(stdout.decode("utf-8"))
diff --git a/pyserini/eval/trec_eval.py b/pyserini/eval/trec_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..b72f7c9f81e0539fcd19d43cfa7664c6471b185e
--- /dev/null
+++ b/pyserini/eval/trec_eval.py
@@ -0,0 +1,112 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Example usage
+# python -m pyserini.eval.trec_eval -m ndcg_cut.10,20 -m all_trec qrels.dev.small.tsv runs/run.Colbert.txt -remove-unjudged -cutoffs.20,50
+
+
+import os
+import re
+import subprocess
+import sys
+import platform
+import pandas as pd
+import tempfile
+
+from pyserini.search import get_qrels_file
+from pyserini.util import download_evaluation_script
+
+script_path = download_evaluation_script('trec_eval')
+cmd_prefix = ['java', '-jar', script_path]
+args = sys.argv
+
+# Option to discard non-judged hits in run file
+judged_docs_only = ''
+judged_result = []
+cutoffs = []
+
+if '-remove-unjudged' in args:
+    judged_docs_only = args.pop(args.index('-remove-unjudged'))
+
+if any([i.startswith('judged.') for i in args]):
+    # Find what position the arg is in.
+    idx = [i.startswith('judged.') for i in args].index(True)
+    cutoffs = args.pop(idx)
+    cutoffs = list(map(int, cutoffs[7:].split(',')))
+    # Get rid of the '-m' before the 'judged.xxx' option
+    args.pop(idx-1)
+
+temp_file = ''
+
+if len(args) > 1:
+    if not os.path.exists(args[-2]):
+        args[-2] = get_qrels_file(args[-2])
+    if os.path.exists(args[-1]):
+        # Convert run to trec if it's on msmarco
+        with open(args[-1]) as f:
+            first_line = f.readline()
+        if 'Q0' not in first_line:
+            temp_file = tempfile.NamedTemporaryFile(delete=False).name
+            print('msmarco run detected. Converting to trec...')
+            run = pd.read_csv(args[-1], delim_whitespace=True, header=None, names=['query_id', 'doc_id', 'rank'])
+            run['score'] = 1 / run['rank']
+            run.insert(1, 'Q0', 'Q0')
+            run['name'] = 'TEMPRUN'
+            run.to_csv(temp_file, sep='\t', header=None, index=None)
+            args[-1] = temp_file
+
+    run = pd.read_csv(args[-1], delim_whitespace=True, header=None)
+    qrels = pd.read_csv(args[-2], delim_whitespace=True, header=None)
+    
+    # cast doc_id column as string
+    run[0] = run[0].astype(str)
+    qrels[0] = qrels[0].astype(str)
+
+    # Discard non-judged hits
+    if judged_docs_only:
+        if not temp_file:
+            temp_file = tempfile.NamedTemporaryFile(delete=False).name
+        judged_indexes = pd.merge(run[[0,2]].reset_index(), qrels[[0,2]], on = [0,2])['index']
+        run = run.loc[judged_indexes]
+        run.to_csv(temp_file, sep='\t', header=None, index=None)
+        args[-1] = temp_file
+    # Measure judged@cutoffs
+    for cutoff in cutoffs:
+        run_cutoff = run.groupby(0).head(cutoff)
+        judged = len(pd.merge(run_cutoff[[0,2]], qrels[[0,2]], on = [0,2])) / len(run_cutoff)
+        metric_name = f'judged_{cutoff}'
+        judged_result.append(f'{metric_name:22}\tall\t{judged:.4f}')
+    cmd = cmd_prefix + args[1:]
+else:
+    cmd = cmd_prefix
+
+print(f'Running command: {cmd}')
+shell = platform.system() == "Windows"
+process = subprocess.Popen(cmd,
+                           stdout=subprocess.PIPE,
+                           stderr=subprocess.PIPE,
+                           shell=shell)
+stdout, stderr = process.communicate()
+if stderr:
+    print(stderr.decode("utf-8"))
+
+print('Results:')
+print(stdout.decode("utf-8").rstrip())
+
+for judged in judged_result:
+    print(judged)
+
+if temp_file:
+    os.remove(temp_file)
diff --git a/pyserini/evaluate_script_info.py b/pyserini/evaluate_script_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e583578bda5cad2c3fce388bce6afe233669545
--- /dev/null
+++ b/pyserini/evaluate_script_info.py
@@ -0,0 +1,37 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+EVALUATION_INFO = {
+    "trec_eval": {
+        "description": "TREC evaluation script",
+        "urls": [
+            "https://search.maven.org/remotecontent?filepath=uk/ac/gla/dcs/terrierteam/jtreceval/0.0.5/jtreceval-0.0.5-jar-with-dependencies.jar",
+        ],
+    },
+    "msmarco_passage_eval": {
+        "description": "MSMARCO-passage evaluation script",
+        "urls": [
+            "https://raw.githubusercontent.com/castorini/anserini-tools/master/scripts/msmarco/msmarco_passage_eval.py",
+        ],
+    },
+    "msmarco_doc_eval": {
+        "description": "MSMARCO-doc evaluation script",
+        "urls": [
+            "https://raw.githubusercontent.com/castorini/anserini-tools/master/scripts/msmarco/msmarco_doc_eval.py",
+        ],
+    }
+
+}
diff --git a/pyserini/external_query_info.py b/pyserini/external_query_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab953f8ba920f413042608df55a39045d0751bfb
--- /dev/null
+++ b/pyserini/external_query_info.py
@@ -0,0 +1,95 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+KILT_QUERY_INFO = {
+    "fever-dev-kilt": {
+        "description": "KILT FEVER dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/fever-dev-kilt.jsonl"],
+        "md5": "ae9a27503d177ba82cdb1e968b1aeac1",
+        "size (bytes)": 6174139,
+        "total_queries": 10444
+    },
+    "aidayago2-dev-kilt": {
+        "description": "KILT AIDA CoNLL-YAGO dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/aidayago2-dev-kilt.jsonl"],
+        "md5": "262c2350c0a331b26cdcc70590f068f2",
+        "size (bytes)": 21061554,
+        "total_queries": 4784
+    },
+    "wned-dev-kilt": {
+        "description": "KILT WNED-WIKI dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/wned-dev-kilt.jsonl"],
+        "md5": "b04e18e85c7f87030f5118c21f1297dc",
+        "size (bytes)": 12868348,
+        "total_queries": 3396
+    },
+    "cweb-dev-kilt": {
+        "description": "KILT WNED-CWEB dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/cweb-dev-kilt.jsonl"],
+        "md5": "bb62b9471cdec028abbe91b19030e9ad",
+        "size (bytes)": 90228527,
+        "total_queries": 5599
+    },
+    "trex-dev-kilt": {
+        "description": "KILT T-REx dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/trex-dev-kilt.jsonl"],
+        "md5": "ccd3c43891f08b2d5d9adf3e6885c8f9",
+        "size (bytes)": 3803558,
+        "total_queries": 5000
+    },
+    "structured_zeroshot-dev-kilt": {
+        "description": "KILT Zero-Shot RE dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/structured_zeroshot-dev-kilt.jsonl"],
+        "md5": "b2cb14cb4b00a90352c9ad8317829cfd",
+        "size (bytes)": 2266707,
+        "total_queries": 3724
+    },
+    "nq-dev-kilt": {
+        "description": "KILT Natural Questions dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/nq-dev-kilt.jsonl"],
+        "md5": "0bb57ca0b4676ed66005b8788d3a3050",
+        "size (bytes)": 7936566,
+        "total_queries": 2837
+    },
+    "hotpotqa-dev-kilt": {
+        "description": "KILT HotpotQA dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/hotpotqa-dev-kilt.jsonl"],
+        "md5": "3ebc5eeaa5572ec29451eb4b66c29333",
+        "size (bytes)": 3971321,
+        "total_queries": 5600
+    },
+    "triviaqa-dev-kilt": {
+        "description": "KILT TriviaQA dev set, generated using KILT's scripts/get_triviaqa_input.py",
+        "urls": ["https://github.com/castorini/pyserini-data/raw/main/queries/triviaqa-dev-kilt.jsonl"],
+        "md5": "0eda82a7a3e24271d623710fa2a2ff64",
+        "size (bytes)": 10314686,
+        "total_queries": 5359
+    },
+    "eli5-dev-kilt": {
+        "description": "KILT ELI5 dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/eli5-dev-kilt.jsonl"],
+        "md5": "7abac8b2495581d513b0542916178893",
+        "size (bytes)": 14149811,
+        "total_queries": 1507
+    },
+    "wow-dev-kilt": {
+        "description": "KILT Wizard of Wikipedia dev set",
+        "urls": ["http://dl.fbaipublicfiles.com/KILT/wow-dev-kilt.jsonl"],
+        "md5": "bf4000198be9d8acbab11a57745a6a8b",
+        "size (bytes)": 2418241,
+        "total_queries": 3058
+    }
+}
diff --git a/pyserini/fusion/__init__.py b/pyserini/fusion/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6eff3bfce97142fca2c7e138ca16afdbdfd1e82e
--- /dev/null
+++ b/pyserini/fusion/__init__.py
@@ -0,0 +1,19 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import average, FusionMethod, interpolation, reciprocal_rank_fusion
+
+__all__ = ['FusionMethod', 'average', 'interpolation', 'reciprocal_rank_fusion']
diff --git a/pyserini/fusion/__main__.py b/pyserini/fusion/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..640754ec95ecf5a34bcb580e1e35f48f60f359af
--- /dev/null
+++ b/pyserini/fusion/__main__.py
@@ -0,0 +1,49 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+from ._base import FusionMethod
+from pyserini.fusion import average, interpolation, reciprocal_rank_fusion
+from ..trectools import TrecRun
+
+
+parser = argparse.ArgumentParser(description='Perform various ways of fusion given a list of trec run files.')
+parser.add_argument('--runs', type=str, nargs='+', default=[], required=True,
+                    help='List of run files separated by space.')
+parser.add_argument('--output', type=str, required=True, help="Path to resulting fused txt.")
+parser.add_argument('--runtag', type=str, default="pyserini.fusion", help="Tag name of fused run.")
+parser.add_argument('--method', type=FusionMethod, default=FusionMethod.RRF, help="The fusion method to be used.")
+parser.add_argument('--rrf.k', dest='rrf_k', type=int, default=60,
+                    help="Parameter k needed for reciprocal rank fusion.")
+parser.add_argument('--alpha', type=float, default=0.5, required=False, help='Alpha value used for interpolation.')
+parser.add_argument('--depth', type=int, default=1000, required=False, help='Pool depth per topic.')
+parser.add_argument('--k', type=int, default=1000, required=False, help='Number of documents to output per topic.')
+parser.add_argument('--resort', action='store_true', help='We resort the Trec run files or not')
+args = parser.parse_args()
+
+trec_runs = [TrecRun(filepath=path,resort=args.resort) for path in args.runs]
+
+fused_run = None
+if args.method == FusionMethod.RRF:
+    fused_run = reciprocal_rank_fusion(trec_runs, rrf_k=args.rrf_k, depth=args.depth, k=args.k)
+elif args.method == FusionMethod.INTERPOLATION:
+    fused_run = interpolation(trec_runs, alpha=args.alpha, depth=args.depth, k=args.k)
+elif args.method == FusionMethod.AVERAGE:
+    fused_run = average(trec_runs, depth=args.depth, k=args.k)
+else:
+    raise NotImplementedError(f'Fusion method {args.method} not implemented.')
+
+fused_run.save_to_txt(args.output, tag=args.runtag)
diff --git a/pyserini/fusion/__pycache__/__init__.cpython-310.pyc b/pyserini/fusion/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6815f48ab837a01da9d9e485fe91954abd21c0df
Binary files /dev/null and b/pyserini/fusion/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/fusion/__pycache__/_base.cpython-310.pyc b/pyserini/fusion/__pycache__/_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a275259efcd250b246d31d8970eb85abd16eab1
Binary files /dev/null and b/pyserini/fusion/__pycache__/_base.cpython-310.pyc differ
diff --git a/pyserini/fusion/_base.py b/pyserini/fusion/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..674fc07e9ab98940e80992abdd4e360e5e6aa888
--- /dev/null
+++ b/pyserini/fusion/_base.py
@@ -0,0 +1,111 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from enum import Enum
+from pyserini.trectools import AggregationMethod, RescoreMethod, TrecRun
+from typing import List
+
+
+class FusionMethod(Enum):
+    RRF = 'rrf'
+    INTERPOLATION = 'interpolation'
+    AVERAGE = 'average'
+
+
+def reciprocal_rank_fusion(runs: List[TrecRun], rrf_k: int = 60, depth: int = None, k: int = None):
+    """Perform reciprocal rank fusion on a list of ``TrecRun`` objects. Implementation follows Cormack et al.
+    (SIGIR 2009) paper titled "Reciprocal Rank Fusion Outperforms Condorcet and Individual Rank Learning Methods."
+
+    Parameters
+    ----------
+    runs : List[TrecRun]
+        List of ``TrecRun`` objects.
+    rrf_k : int
+        Parameter to avoid vanishing importance of lower-ranked documents. Note that this is different from the *k* in
+        top *k* retrieval; set to 60 by default, per Cormack et al.
+    depth : int
+        Maximum number of results from each input run to consider. Set to ``None`` by default, which indicates that
+        the complete list of results is considered.
+    k : int
+        Length of final results list.  Set to ``None`` by default, which indicates that the union of all input documents
+        are ranked.
+
+    Returns
+    -------
+    TrecRun
+        Output ``TrecRun`` that combines input runs via reciprocal rank fusion.
+    """
+
+    # TODO: Add option to *not* clone runs, thus making the method destructive, but also more efficient.
+    rrf_runs = [run.clone().rescore(method=RescoreMethod.RRF, rrf_k=rrf_k) for run in runs]
+    return TrecRun.merge(rrf_runs, AggregationMethod.SUM, depth=depth, k=k)
+
+
+def interpolation(runs: List[TrecRun], alpha: int = 0.5, depth: int = None, k: int = None):
+    """Perform fusion by interpolation on a list of exactly two ``TrecRun`` objects.
+    new_score = first_run_score * alpha + (1 - alpha) * second_run_score.
+
+    Parameters
+    ----------
+    runs : List[TrecRun]
+        List of ``TrecRun`` objects. Exactly two runs.
+    alpha : int
+        Parameter alpha will be applied on the first run and (1 - alpha) will be applied on the second run.
+    depth : int
+        Maximum number of results from each input run to consider. Set to ``None`` by default, which indicates that
+        the complete list of results is considered.
+    k : int
+        Length of final results list.  Set to ``None`` by default, which indicates that the union of all input documents
+        are ranked.
+
+    Returns
+    -------
+    TrecRun
+        Output ``TrecRun`` that combines input runs via interpolation.
+    """
+
+    if len(runs) != 2:
+        raise Exception('Interpolation must be performed on exactly two runs.')
+
+    scaled_runs = []
+    scaled_runs.append(runs[0].clone().rescore(method=RescoreMethod.SCALE, scale=alpha))
+    scaled_runs.append(runs[1].clone().rescore(method=RescoreMethod.SCALE, scale=(1-alpha)))
+
+    return TrecRun.merge(scaled_runs, AggregationMethod.SUM, depth=depth, k=k)
+
+
+def average(runs: List[TrecRun], depth: int = None, k: int = None):
+    """Perform fusion by averaging on a list of ``TrecRun`` objects.
+
+    Parameters
+    ----------
+    runs : List[TrecRun]
+        List of ``TrecRun`` objects.
+    depth : int
+        Maximum number of results from each input run to consider. Set to ``None`` by default, which indicates that
+        the complete list of results is considered.
+    k : int
+        Length of final results list.  Set to ``None`` by default, which indicates that the union of all input documents
+        are ranked.
+
+    Returns
+    -------
+    TrecRun
+        Output ``TrecRun`` that combines input runs via averaging.
+    """
+
+    scaled_runs = [run.clone().rescore(method=RescoreMethod.SCALE, scale=(1/len(runs))) for run in runs]
+    return TrecRun.merge(scaled_runs, AggregationMethod.SUM, depth=depth, k=k)
diff --git a/pyserini/hsearch.py b/pyserini/hsearch.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fd7605712ed496e7ac659fc38a50f6cb70b5551
--- /dev/null
+++ b/pyserini/hsearch.py
@@ -0,0 +1,38 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Deprecated. The package ``pyserini.hsearch` has been renamed `pyserini.search.hybrid`. Stubs are retained here for
+redirection purpose to ensure that code in existing published papers remain function (with warnings)."""
+
+import os
+import sys
+
+from pyserini.search.hybrid import HybridSearcher as NewHybridSearcher
+
+__all__ = ['HybridSearcher']
+
+
+class HybridSearcher(NewHybridSearcher):
+    def __new__(cls, *args, **kwargs):
+        print('pyserini.hsearch.HybridSearcher class has been deprecated, '
+              'please use HybridSearcher from pyserini.search.hybrid instead')
+        return super().__new__(cls)
+
+
+if __name__ == "__main__":
+    print('WARNING: pyserini.hsearch is deprecated, please use pyserini.search.hybrid instead')
+    args = " ".join(sys.argv[1:])
+    os.system(f'python -m pyserini.search.hybrid {args}')
diff --git a/pyserini/index/__init__.py b/pyserini/index/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4120db77a55cf527c747e44a510b48f4b36cda44
--- /dev/null
+++ b/pyserini/index/__init__.py
@@ -0,0 +1,23 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Classes here have been moved to pyserini.index.lucene, e.g., the pyserini.index.Indexer is now
+# pyserini.index.lucene.IndexReader. We're importing symbols here and then re-exporting to preserve
+# backward compatability to code snippets published in Lin et al. (SIGIR 2021).
+
+from .lucene._base import Document, Generator, IndexTerm, Posting, IndexReader
+
+__all__ = ['Document', 'Generator', 'IndexTerm', 'Posting', 'IndexReader']
diff --git a/pyserini/index/__main__.py b/pyserini/index/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecce089074c39e5f6e2475cf56a01dcb3fc400eb
--- /dev/null
+++ b/pyserini/index/__main__.py
@@ -0,0 +1,34 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from jnius import autoclass
+import sys
+import os
+
+print('pyserini.index is deprecated, please use pyserini.index.lucene.')
+args = sys.argv[1:]
+# argument check
+for i in range(len(args)):
+    # Convert double hyphen args into single hyphen args for Java: e.g., --input becomes -input
+    if args[i].startswith('--'):
+        args[i] = args[i][1:]
+    if args[i] == '-input':
+        collection_dir = args[i+1]
+        if os.path.isfile(collection_dir):
+            raise ValueError('Argument -input should be a directory.')
+
+JIndexCollection = autoclass('io.anserini.index.IndexCollection')
+JIndexCollection.main(args)
diff --git a/pyserini/index/__pycache__/__init__.cpython-310.pyc b/pyserini/index/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d825c5969ec882ae10eec45428783bf040006b39
Binary files /dev/null and b/pyserini/index/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/index/faiss.py b/pyserini/index/faiss.py
new file mode 100644
index 0000000000000000000000000000000000000000..50712d378a3a6395373c1549c144a2c5f00a035c
--- /dev/null
+++ b/pyserini/index/faiss.py
@@ -0,0 +1,82 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import os
+import argparse
+import shutil
+import numpy as np
+
+import faiss
+from tqdm import tqdm
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', type=str, help='path to embeddings directory', required=True)
+    parser.add_argument('--output', type=str, help='path to output index dir', required=True)
+    parser.add_argument('--dim', type=int, default=768, required=False)
+    parser.add_argument('--hnsw', action="store_true", required=False)
+    parser.add_argument('--M', type=int, default=256, required=False)
+    parser.add_argument('--efC', type=int, default=256, required=False)
+    parser.add_argument('--pq', action="store_true", required=False)
+    parser.add_argument('--pq-m', type=int, default=192, required=False)
+    parser.add_argument('--pq-nbits', type=int, default=8, required=False)
+    parser.add_argument('--threads', type=int, default=12, required=False)
+    args = parser.parse_args()
+
+    faiss.omp_set_num_threads(args.threads)
+
+    if not os.path.exists(args.output):
+        os.mkdir(args.output)
+
+    if 'index' in os.listdir(args.input):
+        shutil.copy(os.path.join(args.input, 'docid'), os.path.join(args.output, 'docid'))
+        bf_index = faiss.read_index(os.path.join(args.input, 'index'))
+        vectors = bf_index.reconstruct_n(0, bf_index.ntotal)
+    else:
+        vectors = []
+        with open(os.path.join(args.output, 'docid'), 'w') as f_out:
+            for filename in tqdm(os.listdir(args.input)):
+                path = os.path.join(args.input, filename)
+                with open(path) as f_in:
+                    for line in f_in:
+                        info = json.loads(line)
+                        docid = info['id']
+                        vector = info['vector']
+                        f_out.write(f'{docid}\n')
+                        vectors.append(vector)
+    vectors = np.array(vectors, dtype='float32')
+    print(vectors.shape)
+
+    if args.hnsw and args.pq:
+        index = faiss.IndexHNSWPQ(args.dim, args.pq_m, args.M)
+        index.hnsw.efConstruction = args.efC
+        index.metric_type = faiss.METRIC_INNER_PRODUCT
+    elif args.hnsw:
+        index = faiss.IndexHNSWFlat(args.dim, args.M, faiss.METRIC_INNER_PRODUCT)
+        index.hnsw.efConstruction = args.efC
+    elif args.pq:
+        index = faiss.IndexPQ(args.dim, args.pq_m, args.pq_nbits, faiss.METRIC_INNER_PRODUCT)
+    else:
+        index = faiss.IndexFlatIP(args.dim)
+    index.verbose = True
+
+    if args.pq:
+        index.train(vectors)
+
+    index.add(vectors)
+    print(index.ntotal)
+    faiss.write_index(index, os.path.join(args.output, 'index'))
diff --git a/pyserini/index/lucene/__init__.py b/pyserini/index/lucene/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7926753011dbc9e876747f8048c1e73a56e69394
--- /dev/null
+++ b/pyserini/index/lucene/__init__.py
@@ -0,0 +1,21 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import Document, Generator, IndexTerm, Posting, IndexReader
+from ._indexer import LuceneIndexer, JacksonObjectMapper, JacksonJsonNode
+
+__all__ = ['Document', 'Generator', 'IndexTerm', 'Posting', 'IndexReader', 'LuceneIndexer',
+           'JacksonObjectMapper', 'JacksonJsonNode']
\ No newline at end of file
diff --git a/pyserini/index/lucene/__main__.py b/pyserini/index/lucene/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..01d5a05cb1c0268dfe7299aa1c7987c6d90bcc58
--- /dev/null
+++ b/pyserini/index/lucene/__main__.py
@@ -0,0 +1,36 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from jnius import autoclass
+import sys
+import os
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+    for i in range(len(args)):
+        if args[i].startswith('--'):
+            args[i] = args[i][1:]
+
+    # argument check
+    for i in range(len(args)):
+        if args[i] == '-input':
+            collection_dir = args[i+1]
+            if os.path.isfile(collection_dir):
+                raise ValueError('Argument -input should be a directory.')
+
+    JIndexCollection = autoclass('io.anserini.index.IndexCollection')
+    JIndexCollection.main(args)
diff --git a/pyserini/index/lucene/__pycache__/__init__.cpython-310.pyc b/pyserini/index/lucene/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6433203dfa737562b8778ec13e82768ef68fd8b
Binary files /dev/null and b/pyserini/index/lucene/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/index/lucene/__pycache__/_base.cpython-310.pyc b/pyserini/index/lucene/__pycache__/_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c14bf8ed2db38c8d7e585056ce433905b183659c
Binary files /dev/null and b/pyserini/index/lucene/__pycache__/_base.cpython-310.pyc differ
diff --git a/pyserini/index/lucene/__pycache__/_indexer.cpython-310.pyc b/pyserini/index/lucene/__pycache__/_indexer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6e7a601224d83c34442c2daf552471970c46b85c
Binary files /dev/null and b/pyserini/index/lucene/__pycache__/_indexer.cpython-310.pyc differ
diff --git a/pyserini/index/lucene/_base.py b/pyserini/index/lucene/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cf5cee1d50d3fc71369ea95bc0c8e1733839d85
--- /dev/null
+++ b/pyserini/index/lucene/_base.py
@@ -0,0 +1,623 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python interface for raw access to Lucene indexes built by Anserini. The main entry
+point is the ``IndexReaderUtils`` class, which wraps the Java class with the same name in Anserini. Many of the classes
+and methods provided are meant only to provide tools for examining an index and are not optimized for computing over.
+"""
+
+import logging
+from enum import Enum
+from typing import Dict, Iterator, List, Optional, Tuple
+from tqdm import tqdm
+import json
+import math
+
+from pyserini.analysis import get_lucene_analyzer, JAnalyzer, JAnalyzerUtils
+from pyserini.pyclass import autoclass
+from pyserini.util import download_prebuilt_index, get_sparse_indexes_info
+from pyserini.prebuilt_index_info import TF_INDEX_INFO, IMPACT_INDEX_INFO
+
+logger = logging.getLogger(__name__)
+
+
+# Wrappers around Anserini classes
+JDocument = autoclass('org.apache.lucene.document.Document')
+JIndexReader = autoclass('io.anserini.index.IndexReaderUtils')
+
+
+class JIndexHelpers:
+    @staticmethod
+    def JArgs():
+        args = autoclass('io.anserini.index.IndexCollection$Args')()
+        args.storeContents = True
+        args.storeRaw = True
+        args.dryRun = True ## So that indexing will be skipped
+
+        return args
+
+    @staticmethod
+    def JCounters():
+        IndexCollection = autoclass('io.anserini.index.IndexCollection')
+        Counters = autoclass('io.anserini.index.IndexCollection$Counters')
+
+        return Counters(IndexCollection)
+
+
+class Document:
+    """Wrapper class for a Lucene ``Document``.
+
+    Parameters
+    ----------
+    document : JDocument
+        Underlying Lucene ``Document``.
+    """
+
+    def __init__(self, document):
+        if document is None:
+            raise ValueError('Cannot create a Document with None.')
+        self.object = document
+
+    def docid(self: JDocument) -> str:
+        return self.object.getField('id').stringValue()
+
+    def id(self: JDocument) -> str:
+        # Convenient alias for docid()
+        return self.object.getField('id').stringValue()
+
+    def lucene_document(self: JDocument) -> JDocument:
+        return self.object
+
+    def contents(self: JDocument) -> str:
+        return self.object.get('contents')
+
+    def raw(self: JDocument) -> str:
+        return self.object.get('raw')
+
+    def get(self: JDocument, field: str) -> str:
+        return self.object.get(field)
+
+
+class JGenerators(Enum):
+    AclAnthologyGenerator = autoclass('io.anserini.index.generator.AclAnthologyGenerator')
+    DefaultLuceneDocumentGenerator = autoclass('io.anserini.index.generator.DefaultLuceneDocumentGenerator')
+    TweetGenerator = autoclass('io.anserini.index.generator.TweetGenerator')
+    WashingtonPostGenerator = autoclass('io.anserini.index.generator.WashingtonPostGenerator')
+
+
+class Generator:
+    """Wrapper class for Anserini's generators.
+
+    Parameters
+    ----------
+    generator_class : str
+        Name of generator class to instantiate
+    """
+
+    def __init__(self, generator_class):
+        self.counters = JIndexHelpers.JCounters()
+        self.args = JIndexHelpers.JArgs()
+        self.generator_class = generator_class
+        self.object = self._get_generator()
+
+    def _get_generator(self):
+        try:
+            return JGenerators[self.generator_class].value(self.args)
+        except:
+            raise ValueError(self.generator_class)
+
+    def create_document(self, document):
+        """
+        Parameters
+        ----------
+        document : pyserini.collection.pycollection.Document
+            Collection document to create Lucene document from
+
+        Returns
+        -------
+        result : org.apache.lucene.document.Document
+            Lucene document generated
+        """
+        return self.object.createDocument(document.object)
+
+
+class IndexTerm:
+    """Class representing an analyzed term in an index with associated statistics.
+
+    Parameters
+    ----------
+    term : str
+        Analyzed term.
+    df : int
+        Document frequency, the number of documents in the collection that contains the term.
+    cf : int
+        Collection frequency, the number of times the term occurs in the entire collection.  This value is equal to the
+        sum of all the term frequencies of the term across all documents in the collection.
+    """
+
+    def __init__(self, term, df, cf):
+        self.term = term
+        self.df = df
+        self.cf = cf
+
+
+class Posting:
+    """Class representing a posting in a postings list.
+
+    Parameters
+    ----------
+    docid : int
+        Collection ``docid``.
+    tf : int
+        Term frequency.
+    positions : List[int]
+        List of positions.
+    """
+
+    def __init__(self, docid, tf, positions):
+        self.docid = docid
+        self.tf = tf
+        self.positions = positions
+
+    def __repr__(self):
+        repr = '(' + str(self.docid) + ', ' + str(self.tf) + ')'
+        if self.positions:
+            repr += ' [' + ','.join([str(p) for p in self.positions]) + ']'
+        return repr
+
+
+class IndexReader:
+    """Wrapper class for ``IndexReaderUtils`` in Anserini.
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to Lucene index directory.
+    """
+
+    def __init__(self, index_dir):
+        self.object = JIndexReader()
+        self.reader = self.object.getReader(index_dir)
+
+    @classmethod
+    def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False):
+        """Build an index reader from a prebuilt index; download the index if necessary.
+
+        Parameters
+        ----------
+        prebuilt_index_name : str
+            Prebuilt index name.
+        verbose : bool
+            Print status information.
+
+        Returns
+        -------
+        IndexReader
+            Index reader built from the prebuilt index.
+        """
+        if verbose:
+            print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+
+        try:
+            index_dir = download_prebuilt_index(prebuilt_index_name, verbose=verbose)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        if verbose:
+            print(f'Initializing {prebuilt_index_name}...')
+
+        index_reader = cls(index_dir)
+        # Validate index stats; will throw exception there are any issues.
+        index_reader.validate(prebuilt_index_name, verbose=verbose)
+
+        return index_reader
+
+    @staticmethod
+    def list_prebuilt_indexes():
+        """Display information about available prebuilt indexes."""
+        get_sparse_indexes_info()
+
+    def analyze(self, text: str, analyzer=None) -> List[str]:
+        """Analyze a piece of text. Applies Anserini's default Lucene analyzer if analyzer not specified.
+
+        Parameters
+        ----------
+        text : str
+            Text to analyze.
+        analyzer : analyzer
+            Analyzer to apply.
+        Returns
+        -------
+        List[str]
+            List of tokens corresponding to the output of the analyzer.
+        """
+        if analyzer is None:
+            results = JAnalyzerUtils.analyze(text)
+        else:
+            results = JAnalyzerUtils.analyze(analyzer, text)
+        tokens = []
+        for token in results.toArray():
+            tokens.append(token)
+        return tokens
+
+    def validate(self, prebuilt_index_name: str, verbose=False):
+        """Validate this index against stored stats for a pre-built index."""
+        stats = self.stats()
+
+        if prebuilt_index_name in TF_INDEX_INFO:
+            if stats['documents'] != TF_INDEX_INFO[prebuilt_index_name]['documents']:
+                raise ValueError('Pre-built index fails consistency check: "documents" does not match!')
+            if stats['unique_terms'] != TF_INDEX_INFO[prebuilt_index_name]['unique_terms']:
+                raise ValueError('Pre-built index fails consistency check: "unique_terms" does not match!')
+            if stats['total_terms'] != TF_INDEX_INFO[prebuilt_index_name]['total_terms']:
+                raise ValueError('Pre-built index fails consistency check: "total_terms" does not match!')
+        elif prebuilt_index_name in IMPACT_INDEX_INFO:
+            if stats['documents'] != IMPACT_INDEX_INFO[prebuilt_index_name]['documents']:
+                raise ValueError('Pre-built index fails consistency check: "documents" does not match!')
+            if stats['unique_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['unique_terms']:
+                raise ValueError('Pre-built index fails consistency check: "unique_terms" does not match!')
+            if stats['total_terms'] != IMPACT_INDEX_INFO[prebuilt_index_name]['total_terms']:
+                raise ValueError('Pre-built index fails consistency check: "total_terms" does not match!')
+        else:
+            print(f'Unknown pre-built index \'{prebuilt_index_name}\'!')
+            return False
+
+        if verbose:
+            print(stats)
+            print(f'Index passes consistency checks against pre-built index \'{prebuilt_index_name}\'!')
+
+        return True
+
+    def terms(self) -> Iterator[IndexTerm]:
+        """Return an iterator over analyzed terms in the index.
+
+        Returns
+        -------
+        Iterator[IndexTerm]
+            Iterator over :class:`IndexTerm` objects corresponding to (analyzed) terms in the index.
+        """
+        term_iterator = self.object.getTerms(self.reader)
+        while term_iterator.hasNext():
+            cur_term = term_iterator.next()
+            yield IndexTerm(cur_term.getTerm(), cur_term.getDF(), cur_term.getTotalTF())
+
+    def get_term_counts(self, term: str, analyzer: Optional[JAnalyzer] = get_lucene_analyzer()) -> Tuple[int, int]:
+        """Return the document frequency and collection frequency of a term. Applies Anserini's default Lucene
+        ``Analyzer`` if analyzer is not specified.
+
+        Parameters
+        ----------
+        term : str
+            Unanalyzed term.
+        analyzer : analyzer
+            Analyzer to apply.
+
+        Returns
+        -------
+        Tuple[int, int]
+            Document frequency and collection frequency.
+        """
+        if analyzer is None:
+            analyzer = get_lucene_analyzer(stemming=False, stopwords=False)
+
+        term_map = self.object.getTermCountsWithAnalyzer(self.reader, term, analyzer)
+
+        return term_map.get('docFreq'), term_map.get('collectionFreq')
+
+    def get_postings_list(self, term: str, analyzer=get_lucene_analyzer()) -> List[Posting]:
+        """Return the postings list for a term.
+
+        Parameters
+        ----------
+        term : str
+            Raw term.
+        analyzer : analyzer
+            Analyzer to apply. Defaults to Anserini's default.
+
+        Returns
+        -------
+        List[Posting]
+            List of :class:`Posting` objects corresponding to the postings list for the term.
+        """
+        if analyzer is None:
+            postings_list = self.object.getPostingsListForAnalyzedTerm(self.reader, term)
+        else:
+            postings_list = self.object.getPostingsListWithAnalyzer(self.reader, term,
+                                                                    analyzer)
+
+        if postings_list is None:
+            return None
+
+        result = []
+        for posting in postings_list.toArray():
+            result.append(Posting(posting.getDocid(), posting.getTF(), posting.getPositions()))
+        return result
+
+    def get_document_vector(self, docid: str) -> Optional[Dict[str, int]]:
+        """Return the document vector for a ``docid``. Note that requesting the document vector of a ``docid`` that
+        does not exist in the index will return ``None`` (as opposed to an empty dictionary); this forces the caller
+        to handle ``None`` explicitly and guards against silent errors.
+
+        Parameters
+        ----------
+        docid : str
+            Collection ``docid``.
+
+        Returns
+        -------
+        Optional[Dict[str, int]]
+            A dictionary with analyzed terms as keys and their term frequencies as values.
+        """
+        doc_vector_map = self.object.getDocumentVector(self.reader, docid)
+        if doc_vector_map is None:
+            return None
+        doc_vector_dict = {}
+        for term in doc_vector_map.keySet().toArray():
+            doc_vector_dict[term] = doc_vector_map.get(term)
+        return doc_vector_dict
+
+    def get_term_positions(self, docid: str) -> Optional[Dict[str, int]]:
+        """Return the term position mapping of the document with ``docid``. Note that the term in the document is
+        stemmed and stop words may be removed according to your index settings. Also, requesting the document vector of
+        a ``docid`` that does not exist in the index will return ``None`` (as opposed to an empty dictionary); this
+        forces the caller to handle ``None`` explicitly and guards against silent errors.
+
+        Parameters
+        ----------
+        docid : str
+            Collection ``docid``.
+
+        Returns
+        -------
+        Optional[Dict[str, int]]
+            A tuple contains a dictionary with analyzed terms as keys and corresponding posting list as values
+        """
+        java_term_position_map = self.object.getTermPositions(self.reader, docid)
+        if java_term_position_map is None:
+            return None
+        term_position_map = {}
+        for term in java_term_position_map.keySet().toArray():
+            term_position_map[term] = java_term_position_map.get(term).toArray()
+        return term_position_map
+
+    def doc(self, docid: str) -> Optional[Document]:
+        """Return the :class:`Document` corresponding to ``docid``. Returns ``None`` if the ``docid`` does not exist
+        in the index.
+
+        Parameters
+        ----------
+        docid : str
+            The collection ``docid``.
+
+        Returns
+        -------
+        Optional[Document]
+            :class:`Document` corresponding to the ``docid``.
+        """
+        lucene_document = self.object.document(self.reader, docid)
+        if lucene_document is None:
+            return None
+        return Document(lucene_document)
+
+    def doc_by_field(self, field: str, q: str) -> Optional[Document]:
+        """Return the :class:`Document` based on a ``field`` with ``id``. For example, this method can be used to fetch
+        document based on alternative primary keys that have been indexed, such as an article's DOI.
+
+        Parameters
+        ----------
+        field : str
+            The field to look up.
+        q : str
+            The document's unique id.
+
+        Returns
+        -------
+        Optional[Document]
+            :class:`Document` whose ``field`` is ``id``.
+        """
+        lucene_document = self.object.documentByField(self.reader, field, q)
+        if lucene_document is None:
+            return None
+        return Document(lucene_document)
+
+    def doc_raw(self, docid: str) -> Optional[str]:
+        """Return the raw document contents for a collection ``docid``.
+
+        Parameters
+        ----------
+        docid : str
+            Collection ``docid``.
+
+        Returns
+        -------
+        Optional[str]
+            Raw document contents.
+        """
+        return self.object.documentRaw(self.reader, docid)
+
+    def doc_contents(self, docid: str) -> Optional[str]:
+        """Return the indexed document contents for a collection ``docid``.
+
+        Parameters
+        ----------
+        docid : str
+            The collection ``docid``.
+
+        Returns
+        -------
+        Optional[str]
+            Index document contents.
+        """
+        return self.object.documentContents(self.reader, docid)
+
+    def compute_bm25_term_weight(self, docid: str, term: str, analyzer=get_lucene_analyzer(), k1=0.9, b=0.4) -> float:
+        """Compute the BM25 weight of a term in a document. Specify ``analyzer=None`` for an already analyzed term,
+        e.g., from the output of :func:`get_document_vector`.
+
+        Parameters
+        ----------
+        docid : str
+            Collection ``docid``.
+        term : str
+            Term.
+        analyzer : analyzer
+            Lucene analyzer to use, ``None`` if term is already analyzed.
+        k1 : float
+            BM25 k1 parameter.
+        b : float
+            BM25 b parameter.
+
+        Returns
+        -------
+        float
+            BM25 weight of the term in the document, or 0 if the term does not exist in the document.
+        """
+        if analyzer is None:
+            return self.object.getBM25AnalyzedTermWeightWithParameters(self.reader, docid,
+                                                                       term,
+                                                                       float(k1), float(b))
+        else:
+            return self.object.getBM25UnanalyzedTermWeightWithParameters(self.reader, docid,
+                                                                         term, analyzer,
+                                                                         float(k1), float(b))
+
+    def compute_query_document_score(self, docid: str, query: str, similarity=None):
+        if similarity is None:
+            return self.object.computeQueryDocumentScore(self.reader, docid, query)
+        else:
+            return self.object.computeQueryDocumentScoreWithSimilarity(self.reader, docid, query, similarity)
+
+    def convert_internal_docid_to_collection_docid(self, docid: int) -> str:
+        """Convert Lucene's internal ``docid`` to its external collection ``docid``.
+
+        Parameters
+        ----------
+        docid : int
+            Lucene internal ``docid``.
+
+        Returns
+        -------
+        str
+            External collection ``docid`` corresponding to Lucene's internal ``docid``.
+        """
+        return self.object.convertLuceneDocidToDocid(self.reader, docid)
+
+    def convert_collection_docid_to_internal_docid(self, docid: str) -> int:
+        """Convert external collection ``docid`` to its Lucene's internal ``docid``.
+
+        Parameters
+        ----------
+        docid : str
+            External collection ``docid``.
+
+        Returns
+        -------
+        str
+            Lucene internal ``docid`` corresponding to the external collection ``docid``.
+        """
+        return self.object.convertDocidToLuceneDocid(self.reader, docid)
+
+    def stats(self) -> Dict[str, int]:
+        """Return dictionary with index statistics.
+
+        Returns
+        -------
+        Dict[str, int]
+            Index statistics as a dictionary of statistic's name to statistic.
+            - documents: number of documents
+            - non_empty_documents: number of non-empty documents
+            - unique_terms: number of unique terms
+            - total_terms: number of total terms
+        """
+        index_stats_map = self.object.getIndexStats(self.reader)
+
+        if index_stats_map is None:
+            return None
+
+        index_stats_dict = {}
+        for term in index_stats_map.keySet().toArray():
+            index_stats_dict[term] = index_stats_map.get(term)
+
+        return index_stats_dict
+
+    def dump_documents_BM25(self, file_path, k1=0.9, b=0.4):
+        """Dumps out all the document vectors with BM25 weights in Pyserini's JSONL vector format.
+
+        Parameters
+        ----------
+        file_path : str
+            File path to dump JSONL file.
+        k1 : float
+            BM25 k1 parameter.
+        b : float
+            BM25 b parameter.
+        """
+
+        f = open(file_path, 'w')
+
+        assert 'documents' in self.stats()
+        for i in tqdm(range(self.stats()['documents'])):
+            docid = self.convert_internal_docid_to_collection_docid(i)
+            bm25_vector = {}
+            for term in self.get_document_vector(docid):
+                bm25_vector[term] = self.compute_bm25_term_weight(docid, term, analyzer=None, k1=k1, b=b)
+
+            # vectors are written line by line to avoid running out of memory
+            f.write(json.dumps({'id': docid, 'vector': bm25_vector}) + "\n")
+
+        f.close()
+
+    def quantize_weights(self, input_file_path, output_file_path, bits = 8):
+        """Takes vectors of weights in Pyserini's JSONL vector format and quantizes them.
+
+        Parameters
+        ----------
+        input_file_path : str
+            File path of vectors of weights in Pyserini's JSONL vector format.
+        output_file_path : str
+            File path to output JSONL file of quantized weight vectors.
+        bits : int
+            Number of bits to use to represent quantized scores.
+        """
+
+        min_weight = float('inf')
+        max_weight = float('-inf')
+
+        input_file = open(input_file_path, 'r')
+
+        # vectors are read line by line to avoid running out of memory
+        for line in input_file:
+            doc = json.loads(line)
+            for weight in doc['vector'].values():
+                if weight > max_weight:
+                    max_weight = weight
+                if weight < min_weight:
+                    min_weight = weight
+        input_file.seek(0)
+
+        output_file = open(output_file_path, 'w')
+
+        smallest_impact = 1
+        for line in input_file:
+            doc = json.loads(line)
+            for element in doc['vector']:
+                doc['vector'][element] = math.floor((2 ** bits - smallest_impact) * (doc['vector'][element] - min_weight) / (max_weight - min_weight)) + smallest_impact
+            output_file.write(json.dumps(doc) + "\n")
+
+        input_file.close()
+        output_file.close()
diff --git a/pyserini/index/lucene/_indexer.py b/pyserini/index/lucene/_indexer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8546ee31572eea5a5d1fa8d133417573285124aa
--- /dev/null
+++ b/pyserini/index/lucene/_indexer.py
@@ -0,0 +1,121 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+from typing import List, Dict
+
+from pyserini.pyclass import autoclass
+
+logger = logging.getLogger(__name__)
+
+JLuceneIndexer = autoclass('io.anserini.index.SimpleIndexer')
+JsonCollectionDocument = autoclass('io.anserini.collection.JsonCollection$Document')
+JacksonObjectMapper = autoclass('com.fasterxml.jackson.databind.ObjectMapper')
+JacksonJsonNode = autoclass('com.fasterxml.jackson.databind.JsonNode')
+
+
+class LuceneIndexer:
+    """Wrapper class for ``SimpleIndexer`` in Anserini. Provides basic functionality for on-the-fly indexing via a
+    programmatic API, i.e., indexing in-process objects as opposed to on-file documents.
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to Lucene index directory.
+    args : List[str]
+        List of arguments to pass to ``SimpleIndexer``.
+    append : bool
+        Append to existing index.
+    threads : int
+        Number of indexing threads.
+    """
+    def __init__(self, index_dir: str = None, args: List[str] = None, append: bool = False, threads: int = 8):
+        self.index_dir = index_dir
+        self.args = args
+        if args:
+            args.extend(['-input', '', '-collection', 'JsonCollection', '-threads', str(threads)])
+            if append:
+                args.extend(['-append'])
+            self.object = JLuceneIndexer(args)
+        else:
+            self.object = JLuceneIndexer(index_dir, append, int(threads))
+
+        self.mapper = JacksonObjectMapper()
+
+    def add_doc_raw(self, doc: str):
+        """Add a raw document (in the form of a JSON string) to the index.
+
+        Parameters
+        ----------
+        doc : str
+            Document to add.
+        """
+        self.object.addRawDocument(doc)
+
+    def add_doc_dict(self, doc: Dict[str, str]):
+        """Add a document (in the form of a Python dictionary) to the index.
+
+        Parameters
+        ----------
+        doc : Dict[str, str]
+            Document to add.
+        """
+        self.object.addJsonDocument(JsonCollectionDocument.fromFields(doc['id'], doc['contents']))
+
+    def add_doc_json(self, node: JacksonJsonNode):
+        """Add a document (in the form of a Jackson JSON node object) to the index.
+
+        Parameters
+        ----------
+        node : JacksonJsonNode
+            Document to add.
+        """
+        self.object.addJsonNode(node)
+
+    def add_batch_raw(self, docs: List[str]):
+        """Add a batch of raw documents (in the form of JSON strings) to the index.
+
+        Parameters
+        ----------
+        docs : List[str]
+            Documents to add.
+        """
+        self.object.addRawDocuments(docs)
+
+    def add_batch_dict(self, docs: List[Dict[str, str]]):
+        """Add a batch of documents (in the form of Python dictionaries) to the index.
+
+        Parameters
+        ----------
+        docs : List[Dict[str, str]]
+            Documents to add.
+        """
+        docs = list(map(lambda d: JsonCollectionDocument.fromFields(d['id'], d['contents']), docs))
+        self.object.addJsonDocuments(docs)
+
+    def add_batch_json(self, nodes: List[JacksonJsonNode]):
+        """Add a batch of documents (in the form of Jackson JSON node objects) to the index.
+
+        Parameters
+        ----------
+        nodes : List[JacksonJsonNode]
+            Documents to add.
+        """
+        self.object.addJsonNodes(nodes)
+
+    def close(self):
+        """Close this indexer, committing all in-memory data to disk."""
+        self.object.close()
diff --git a/pyserini/index/merge_faiss_indexes.py b/pyserini/index/merge_faiss_indexes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5662aae9fe4ec844f89e85b7bf992341bd73321d
--- /dev/null
+++ b/pyserini/index/merge_faiss_indexes.py
@@ -0,0 +1,46 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+
+import faiss
+import os
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dimension', type=int, help='dimension of passage embeddings', required=False, default=768)
+parser.add_argument('--prefix', type=str, help='directory to store brute force index of corpus', required=True)
+parser.add_argument('--shard-num', type=int, help='number of shards', default=1)
+args = parser.parse_args()
+
+new_index = faiss.IndexFlatIP(args.dimension)
+docid_files = []
+for i in range(args.shard_num):
+    index = faiss.read_index(os.path.join(args.prefix + str(i), 'index'))
+    docid_files.append(os.path.join(args.prefix + str(i), 'docid'))
+    vectors = index.reconstruct_n(0, index.ntotal)
+    new_index.add(vectors)
+
+if not os.path.exists(args.prefix + 'full'):
+    os.mkdir(args.prefix + 'full')
+
+faiss.write_index(new_index, os.path.join(args.prefix + 'full', 'index'))
+
+with open(os.path.join(args.prefix + 'full', 'docid'), 'w') as wfd:
+    for f in docid_files:
+        with open(f, 'r') as f1:
+            for line in f1:
+                wfd.write(line)
diff --git a/pyserini/index/nmslib.py b/pyserini/index/nmslib.py
new file mode 100644
index 0000000000000000000000000000000000000000..be658b4de82b15fa81c5fd573720a5008a88afad
--- /dev/null
+++ b/pyserini/index/nmslib.py
@@ -0,0 +1,102 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import copy
+import json
+import os
+import shutil
+import time
+
+import faiss
+import nmslib
+from scipy.sparse import csr_matrix
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', type=str, help='path to embeddings directory', required=True)
+    parser.add_argument('--output', type=str, help='path to output index dir', required=True)
+    parser.add_argument('--M', type=int, default=256, required=False)
+    parser.add_argument('--efC', type=int, default=256, required=False)
+    parser.add_argument('--threads', type=int, default=12, required=False)
+    args = parser.parse_args()
+
+    if not os.path.exists(args.output):
+        os.mkdir(args.output)
+
+    is_sparse = False
+
+    if 'index' in os.listdir(args.input):
+        shutil.copy(os.path.join(args.input, 'docid'), os.path.join(args.output, 'docid'))
+        bf_index = faiss.read_index(os.path.join(args.input, 'index'))
+        vectors = bf_index.reconstruct_n(0, bf_index.ntotal)
+    else:
+        vectors = []
+        for filename in os.listdir(args.input):
+            path = os.path.join(args.input, filename)
+            with open(path) as f_in, open(os.path.join(args.output, 'docid'), 'w') as f_out:
+                for line in f_in:
+                    info = json.loads(line)
+                    docid = info['id']
+                    vector = info['vector']
+                    f_out.write(f'{docid}\n')
+                    vectors.append(vector)
+
+    tokens = set()
+    if isinstance(vectors[0], dict):
+        is_sparse = True
+        for vec in vectors:
+            for key in vec:
+                tokens.add(key)
+    token2id = {}
+    with open(os.path.join(args.output, 'tokens'), 'w') as f:
+        for idx, tok in enumerate(tokens):
+            token2id[tok] = idx
+            f.write(f'{tok}\n')
+
+    if is_sparse:
+        matrix_row, matrix_col, matrix_data = [], [], []
+        for i, vec in enumerate(vectors):
+            weight_dict = vec
+            tokens = weight_dict.keys()
+            col = [token2id[tok] for tok in tokens]
+            data = weight_dict.values()
+            matrix_row.extend([i] * len(weight_dict))
+            matrix_col.extend(col)
+            matrix_data.extend(data)
+        vectors = csr_matrix((matrix_data, (matrix_row, matrix_col)), shape=(len(vectors), len(token2id)))
+
+    M = args.M
+    efC = args.efC
+    num_threads = args.threads
+    index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post': 0}
+    if is_sparse:
+        index = nmslib.init(method='hnsw', space='negdotprod_sparse', data_type=nmslib.DataType.SPARSE_VECTOR)
+    else:
+        index = nmslib.init(method='hnsw', space='negdotprod', data_type=nmslib.DataType.DENSE_VECTOR)
+    index.addDataPointBatch(vectors)
+    start = time.time()
+    index.createIndex(index_time_params, print_progress=True)
+    end = time.time()
+    index_time = end - start
+    print('Index-time parameters', index_time_params)
+    print('Indexing time = %f' % index_time)
+    index.saveIndex(os.path.join(args.output, 'index.bin'), save_data=True)
+
+    metadata = copy.deepcopy(index_time_params)
+    metadata['index-time'] = index_time
+    metadata['type'] = 'sparse' if is_sparse else 'dense'
+    json.dump(metadata, open(os.path.join(args.output, 'meta'), 'w'), indent=4)
diff --git a/pyserini/multithreading.py b/pyserini/multithreading.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab36c535f24f8056f0d8d3e14d99ccca4f03b514
--- /dev/null
+++ b/pyserini/multithreading.py
@@ -0,0 +1,39 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import threading
+
+
+class ThreadSafeCount:
+    
+    def __init__(self):
+        self.value = 0
+        self.lock = threading.Lock()
+        
+    def increment(self, inc=1):
+        with self.lock:
+            self.value += inc
+            return self.value
+     
+            
+class Counters:
+    
+    def __init__(self):
+        self.indexable = ThreadSafeCount()
+        self.unindexable = ThreadSafeCount()
+        self.skipped = ThreadSafeCount()
+        self.errors = ThreadSafeCount()
+
diff --git a/pyserini/output_writer.py b/pyserini/output_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..2222552244f55d96fdc7ab0801199e28a22d8000
--- /dev/null
+++ b/pyserini/output_writer.py
@@ -0,0 +1,116 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import os
+
+from abc import ABC, abstractmethod
+from enum import Enum, unique
+from typing import List
+
+from pyserini.search import JLuceneSearcherResult
+
+
+@unique
+class OutputFormat(Enum):
+    TREC = 'trec'
+    MSMARCO = "msmarco"
+    KILT = 'kilt'
+
+
+class OutputWriter(ABC):
+
+    def __init__(self, file_path: str, mode: str = 'w',
+                 max_hits: int = 1000, tag: str = None, topics: dict = None,
+                 use_max_passage: bool = False, max_passage_delimiter: str = None, max_passage_hits: int = 100):
+        self.file_path = file_path
+        self.mode = mode
+        self.tag = tag
+        self.topics = topics
+        self.use_max_passage = use_max_passage
+        self.max_passage_delimiter = max_passage_delimiter if use_max_passage else None
+        self.max_hits = max_passage_hits if use_max_passage else max_hits
+        self._file = None
+
+    def __enter__(self):
+        dirname = os.path.dirname(self.file_path)
+        if dirname:
+            os.makedirs(dirname, exist_ok=True)
+        self._file = open(self.file_path, self.mode)
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self._file.close()
+
+    def hits_iterator(self, hits: List[JLuceneSearcherResult]):
+        unique_docs = set()
+        rank = 1
+        for hit in hits:
+            if self.use_max_passage and self.max_passage_delimiter:
+                docid = hit.docid.split(self.max_passage_delimiter)[0]
+            else:
+                docid = hit.docid.strip()
+
+            if self.use_max_passage:
+                if docid in unique_docs:
+                    continue
+                unique_docs.add(docid)
+
+            yield docid, rank, hit.score, hit
+
+            rank = rank + 1
+            if rank > self.max_hits:
+                break
+
+    @abstractmethod
+    def write(self, topic: str, hits: List[JLuceneSearcherResult]):
+        raise NotImplementedError()
+
+
+class TrecWriter(OutputWriter):
+    def write(self, topic: str, hits: List[JLuceneSearcherResult]):
+        for docid, rank, score, _ in self.hits_iterator(hits):
+            self._file.write(f'{topic} Q0 {docid} {rank} {score:.6f} {self.tag}\n')
+
+
+class MsMarcoWriter(OutputWriter):
+    def write(self, topic: str, hits: List[JLuceneSearcherResult]):
+        for docid, rank, score, _ in self.hits_iterator(hits):
+            self._file.write(f'{topic}\t{docid}\t{rank}\n')
+
+
+class KiltWriter(OutputWriter):
+    def write(self, topic: str, hits: List[JLuceneSearcherResult]):
+        datapoint = self.topics[topic]
+        provenance = []
+        for docid, rank, score, _ in self.hits_iterator(hits):
+            provenance.append({"wikipedia_id": docid})
+        datapoint["output"] = [{"provenance": provenance}]
+        json.dump(datapoint, self._file)
+        self._file.write('\n')
+
+
+def get_output_writer(file_path: str, output_format: OutputFormat, *args, **kwargs) -> OutputWriter:
+    mapping = {
+        OutputFormat.TREC: TrecWriter,
+        OutputFormat.MSMARCO: MsMarcoWriter,
+        OutputFormat.KILT: KiltWriter,
+    }
+    return mapping[output_format](file_path, *args, **kwargs)
+
+
+def tie_breaker(hits):
+    return sorted(hits, key=lambda x: (-x.score, x.docid))
diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bde039b3ce943bf0b1f3d6f2c335450ae936359
--- /dev/null
+++ b/pyserini/prebuilt_index_info.py
@@ -0,0 +1,5679 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+TF_INDEX_INFO_MSMARCO = {
+    # MS MARCO V1 document corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v1-doc": {
+        "description": "Lucene index of the MS MARCO V1 document corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc.20221004.252b5e.tar.gz",
+        ],
+        "md5": "b2b1841c93255f9902150128d5e27e41",
+        "size compressed (bytes)": 13736982438,
+        "total_terms": 2742219865,
+        "documents": 3213835,
+        "unique_terms": 29823777,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-slim": {
+        "description": "Lucene index of the MS MARCO V1 document corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-slim.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-slim.20221004.252b5e.tar.gz",
+        ],
+        "md5": "400fe94ec97a20cf775596085c5ad79d",
+        "size compressed (bytes)": 1791498133,
+        "total_terms": 2742219865,
+        "documents": 3213835,
+        "unique_terms": 29823777,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-full": {
+        "description": "Lucene index of the MS MARCO V1 document corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-full.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-full.20221004.252b5e.tar.gz",
+        ],
+        "md5": "75735da0dd35e3631d22bf682ebed8a0",
+        "size compressed (bytes)": 25525615599,
+        "total_terms": 2742219865,
+        "documents": 3213835,
+        "unique_terms": 29823777,
+        "downloaded": False
+    },
+
+    # MS MARCO V1 document corpus, doc2query-T5 expansions.
+    "msmarco-v1-doc-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V1 document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.tar.gz",
+        ],
+        "md5": "87530b64e55080fcfb90ec9e598be23e",
+        "size compressed (bytes)": 1885596544,
+        "total_terms": 3748343494,
+        "documents": 3213835,
+        "unique_terms": 30631009,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V1 document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20221004.252b5e.tar.gz",
+        ],
+        "md5": "a081b866b78e0f604ddb9e3103ee6cc5",
+        "size compressed (bytes)": 11152231182,
+        "total_terms": 3748343494,
+        "documents": 3213835,
+        "unique_terms": 30631009,
+        "downloaded": False
+    },
+
+    # MS MARCO V1 segmented document corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v1-doc-segmented": {
+        "description": "Lucene index of the MS MARCO V1 segmented document corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.tar.gz",
+        ],
+        "md5": "59fdf88f360d0a72d1b94b9729c2198e",
+        "size compressed (bytes)": 15924438098,
+        "total_terms": 3200522554,
+        "documents": 20545677,
+        "unique_terms": 21191748,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-segmented-slim": {
+        "description": "Lucene index of the MS MARCO V1 segmented document corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented-slim.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented-slim.20221004.252b5e.tar.gz",
+        ],
+        "md5": "c277161780d501ab832e16e6396f9cae",
+        "size compressed (bytes)": 3306727108,
+        "total_terms": 3200522554,
+        "documents": 20545677,
+        "unique_terms": 21191748,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-segmented-full": {
+        "description": "Lucene index of the MS MARCO V1 segmented document corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented-full.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented-full.20221004.252b5e.tar.gz",
+        ],
+        "md5": "c1af97d16c552a99a23382639c4a668c",
+        "size compressed (bytes)": 29470600011,
+        "total_terms": 3200522554,
+        "documents": 20545677,
+        "unique_terms": 21191748,
+        "downloaded": False
+    },
+
+    # MS MARCO V1 segmented document corpus, doc2query-T5 expansions.
+    "msmarco-v1-doc-segmented-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V1 segmented document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.tar.gz",
+        ],
+        "md5": "b242fd9cb0982e87d0c667439cb6d59c",
+        "size compressed (bytes)": 3554554620,
+        "total_terms": 4206646183,
+        "documents": 20545677,
+        "unique_terms": 22055268,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-segmented-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V1 segmented document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20221004.252b5e.tar.gz",
+        ],
+        "md5": "40341fc2cf151b8c447a8e77f5e9f100",
+        "size compressed (bytes)": 16349673687,
+        "total_terms": 4206646183,
+        "documents": 20545677,
+        "unique_terms": 22055268,
+        "downloaded": False
+    },
+
+    # MS MARCO V1 passage corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v1-passage": {
+        "description": "Lucene index of the MS MARCO V1 passage corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage.20221004.252b5e.tar.gz",
+        ],
+        "md5": "c697b18c9a0686ca760583e615dbe450",
+        "size compressed (bytes)": 2170758938,
+        "total_terms": 352316036,
+        "documents": 8841823,
+        "unique_terms": 2660824,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-slim": {
+        "description": "Lucene index of the MS MARCO V1 passage corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-slim.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-slim.20221004.252b5e.tar.gz",
+        ],
+        "md5": "9f952db731ed7c3f2ec14010664ddcec",
+        "size compressed (bytes)": 491451085,
+        "total_terms": 352316036,
+        "documents": 8841823,
+        "unique_terms": 2660824,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-full": {
+        "description": "Lucene index of the MS MARCO V1 passage corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-full.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-full.20221004.252b5e.tar.gz",
+        ],
+        "md5": "0ff5ceaae32333d3580ae594d460385c",
+        "size compressed (bytes)": 3720616158,
+        "total_terms": 352316036,
+        "documents": 8841823,
+        "unique_terms": 2660824,
+        "downloaded": False
+    },
+
+    # MS MARCO V1 passage corpus, doc2query-T5 expansions.
+    "msmarco-v1-passage-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V1 passage corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.tar.gz",
+        ],
+        "md5": "0a62959d300634aa0eb37e910aa4f4a7",
+        "size compressed (bytes)": 807866125,
+        "total_terms": 1986612263,
+        "documents": 8841823,
+        "unique_terms": 3929111,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V1 passage corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20221004.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20221004.252b5e.tar.gz",
+        ],
+        "md5": "2530b20771c6f441073ff49a56ea9004",
+        "size compressed (bytes)": 4409861543,
+        "total_terms": 1986612263,
+        "documents": 8841823,
+        "unique_terms": 3929111,
+        "downloaded": False
+    },
+
+    # MS MARCO V1 indexes for LTR experiments.
+    "msmarco-passage-ltr": {
+        "description": "Lucene index of the MS MARCO passage corpus with four extra preprocessed fields for LTR. (Lucene 8)",
+        "filename": "index-msmarco-passage-ltr-20210519-e25e33f.tar.gz",
+        "readme": "index-msmarco-passage-ltr-20210519-e25e33f-readme.txt",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/index-msmarco-passage-ltr-20210519-e25e33f.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/8qFCaCtwabRfYQD/download"
+        ],
+        "md5": "a5de642c268ac1ed5892c069bdc29ae3",
+        "size compressed (bytes)": 14073966046,
+        "total_terms": 352316036,
+        "documents": 8841823,
+        "unique_terms": 2660824,
+        "downloaded": False
+    },
+    "msmarco-doc-per-passage-ltr": {
+        "description": "Lucene index of the MS MARCO document per-passage corpus with four extra preprocessed fields for LTR. (Lucene 8)",
+        "filename": "index-msmarco-doc-per-passage-ltr-20211031-33e4151.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/index-msmarco-doc-per-passage-ltr-20211031-33e4151.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/kNdXMWXEsTt3fT8/download"
+        ],
+        "md5": "bd60e89041b4ebbabc4bf0cfac608a87",
+        "size compressed (bytes)": 45835520960,
+        "total_terms": 1232004740,
+        "documents": 20545628,
+        "unique_terms": 10123678,
+        "downloaded": False
+    },
+    "msmarco-document-segment-ltr": {
+        "description": "Lucene index of the MS MARCO document segmented corpus with four extra preprocessed fields for LTR. (Lucene 8)",
+        "filename": "lucene-index.msmarco-doc-segmented.ibm.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-doc-segmented.ibm.tar.gz"
+        ],
+        "md5": "13064bdaf8e8a79222634d67ecd3ddb5",
+        "size compressed (bytes)": 98984853515,
+        "total_terms": 3197500226,
+        "documents": 20532330,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 document corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v2-doc": {
+        "description": "Lucene index of the MS MARCO V2 document corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "0599bd6ed5ee28390b279eb398ef0267",
+        "size compressed (bytes)": 63431299815,
+        "total_terms": 14165667143,
+        "documents": 11959635,
+        "unique_terms": 44860768,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-slim": {
+        "description": "Lucene index of the MS MARCO V2 document corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-slim.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-slim.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "4dfc5549e3c15abec4b9694542a376d1",
+        "size compressed (bytes)": 7172175394,
+        "total_terms": 14165667143,
+        "documents": 11959635,
+        "unique_terms": 44860768,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-full": {
+        "description": "Lucene index of the MS MARCO V2 document corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-full.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-full.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "fc6f546898725617eb5ca7a144bef531",
+        "size compressed (bytes)": 119537276117,
+        "total_terms": 14165667143,
+        "documents": 11959635,
+        "unique_terms": 44860768,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 document corpus, doc2query-T5 expansions.
+    "msmarco-v2-doc-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V2 document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "25514f77600a6be87aeb1c66c9107b89",
+        "size compressed (bytes)": 8155218407,
+        "total_terms": 19760783236,
+        "documents": 11959635,
+        "unique_terms": 54148271,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V2 document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "a3ce9b1146857a332825825623ab89e7",
+        "size compressed (bytes)": 54415612794,
+        "total_terms": 19760783236,
+        "documents": 11959635,
+        "unique_terms": 54148271,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 segmented document corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v2-doc-segmented": {
+        "description": "Lucene index of the MS MARCO V2 segmented document corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "8a5f444fa5a63cc5d4ddc3e6dd15faa0",
+        "size compressed (bytes)": 109269078191,
+        "total_terms": 24780918039,
+        "documents": 124131414,
+        "unique_terms": 29265408,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-segmented-slim": {
+        "description": "Lucene index of the MS MARCO V2 segmented document corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented-slim.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented-slim.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "f50c591aa9a0a0126ebc4dc53c6306d7",
+        "size compressed (bytes)": 20852487058,
+        "total_terms": 24780918039,
+        "documents": 124131414,
+        "unique_terms": 29265408,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-segmented-full": {
+        "description": "Lucene index of the MS MARCO V2 segmented document corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented-full.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented-full.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "259b936d3591e48770da9dde153d1617",
+        "size compressed (bytes)": 201358944352,
+        "total_terms": 24780918039,
+        "documents": 124131414,
+        "unique_terms": 29265408,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 segmented document corpus, doc2query-T5 expansions.
+    "msmarco-v2-doc-segmented-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V2 segmented document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "1e9fa18f082aaadfef02ba9eea32fcc2",
+        "size compressed (bytes)": 24242738999,
+        "total_terms": 30376034132,
+        "documents": 124131414,
+        "unique_terms": 38932296,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-segmented-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V2 segmented document corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "eff6fe5b61936491c8985ad7efa46b20",
+        "size compressed (bytes)": 114315186555,
+        "total_terms": 30376034132,
+        "documents": 124131414,
+        "unique_terms": 38932296,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 passage corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v2-passage": {
+        "description": "Lucene index of the MS MARCO V2 passage corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "eacd8556dd416ccad517b5e7dc97bceb",
+        "size compressed (bytes)": 38808092190,
+        "total_terms": 4673266800,
+        "documents": 138364198,
+        "unique_terms": 11885838,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-slim": {
+        "description": "Lucene index of the MS MARCO V2 passage corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-slim.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-slim.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "d7e644c048669aa72314dd358b475765",
+        "size compressed (bytes)": 8170344330,
+        "total_terms": 4673266800,
+        "documents": 138364198,
+        "unique_terms": 11885838,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-full": {
+        "description": "Lucene index of the MS MARCO V2 passage corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-full.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-full.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "ef5c22c865094c386b9ec600165bb061",
+        "size compressed (bytes)": 60413585958,
+        "total_terms": 4673266800,
+        "documents": 138364198,
+        "unique_terms": 11885838,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 passage corpus, doc2query-T5 expansions.
+    "msmarco-v2-passage-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V2 passage corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "3c357f9c219e4c3d980bc663e1f5a5f4",
+        "size compressed (bytes)": 14404903785,
+        "total_terms": 16961479264,
+        "documents": 138364198,
+        "unique_terms": 36651533,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V2 passage corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "01e369b644e5a8b7413e04140780cf94",
+        "size compressed (bytes)": 59206472740,
+        "total_terms": 16961479264,
+        "documents": 138364198,
+        "unique_terms": 36651533,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 augmented passage corpus, three indexes with different amounts of information (and sizes).
+    "msmarco-v2-passage-augmented": {
+        "description": "Lucene index of the MS MARCO V2 augmented passage corpus. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "69675971a0172eb5e37668ea42761d43",
+        "size compressed (bytes)": 75036026507,
+        "total_terms": 15272965252,
+        "documents": 138364198,
+        "unique_terms": 16579899,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-augmented-slim": {
+        "description": "Lucene index of the MS MARCO V2 augmented passage corpus ('slim' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-augmented-slim.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-augmented-slim.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "3524b5b28117ac1a5365cd664c6871f1",
+        "size compressed (bytes)": 14757394934,
+        "total_terms": 15272965252,
+        "documents": 138364198,
+        "unique_terms": 16579899,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-augmented-full": {
+        "description": "Lucene index of the MS MARCO V2 augmented passage corpus ('full' version). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-augmented-full.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-augmented-full.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "c3e18c02e749c0416e1acc653899c6b0",
+        "size compressed (bytes)": 130622740320,
+        "total_terms": 15272965252,
+        "documents": 138364198,
+        "unique_terms": 16579899,
+        "downloaded": False
+    },
+
+    # MS MARCO V2 augmented passage corpus, doc2query-T5 expansions.
+    "msmarco-v2-passage-augmented-d2q-t5": {
+        "description": "Lucene index of the MS MARCO V2 augmented passage corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "2b683a3a64692b95375ddbdcb9590f25",
+        "size compressed (bytes)": 14404903785,
+        "total_terms": 27561177716,
+        "documents": 138364198,
+        "unique_terms": 41177061,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-augmented-d2q-t5-docvectors": {
+        "description": "Lucene index (+docvectors) of the MS MARCO V2 augmented passage corpus with doc2query-T5 expansions. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "fe6eaeceabaa06cb09fdf8432f65f9d8",
+        "size compressed (bytes)": 59206472740,
+        "total_terms": 27561177716,
+        "documents": 138364198,
+        "unique_terms": 41177061,
+        "downloaded": False
+    }
+}
+
+TF_INDEX_INFO_BEIR = {
+    # BEIR (v1.0.0) flat indexes
+    "beir-v1.0.0-trec-covid.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): TREC-COVID",
+        "filename": "lucene-index.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "57b812594b11d064a23123137ae7dade",
+        "size compressed (bytes)": 226268665,
+        "total_terms": 20822821,
+        "documents": 171331,
+        "unique_terms": 202648,
+        "downloaded": False
+    },
+    "beir-v1.0.0-bioasq.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): BioASQ",
+        "filename": "lucene-index.beir-v1.0.0-bioasq.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "cf8d4804b06bb8678d30b1375b46a0b3",
+        "size compressed (bytes)": 24821933356,
+        "total_terms": 2257541758,
+        "documents": 14914603,
+        "unique_terms": 4960004,
+        "downloaded": False
+    },
+    "beir-v1.0.0-nfcorpus.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): NFCorpus",
+        "filename": "lucene-index.beir-v1.0.0-nfcorpus.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "34c0b11ad13a4715a78d025902061d37",
+        "size compressed (bytes)": 6509700,
+        "total_terms": 637485,
+        "documents": 3633,
+        "unique_terms": 22111,
+        "downloaded": False
+    },
+    "beir-v1.0.0-nq.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): NQ",
+        "filename": "lucene-index.beir-v1.0.0-nq.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "a2c5db4dd3780fff3c7c6bfea1dd08e8",
+        "size compressed (bytes)": 1645453748,
+        "total_terms": 151249294,
+        "documents": 2681468,
+        "unique_terms": 997027,
+        "downloaded": False
+    },
+    "beir-v1.0.0-hotpotqa.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): HotpotQA",
+        "filename": "lucene-index.beir-v1.0.0-hotpotqa.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "3be2875f93537369641287dcdf25add9",
+        "size compressed (bytes)": 2019081888,
+        "total_terms": 172477066,
+        "documents": 5233329,
+        "unique_terms": 2644892,
+        "downloaded": False
+    },
+    "beir-v1.0.0-fiqa.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): FiQA-2018",
+        "filename": "lucene-index.beir-v1.0.0-fiqa.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "409b779e8a39813d2fbdfd1ea2f009e9",
+        "size compressed (bytes)": 55982536,
+        "total_terms": 5288635,
+        "documents": 57600,
+        "unique_terms": 66977,
+        "downloaded": False
+    },
+    "beir-v1.0.0-signal1m.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): Signal-1M",
+        "filename": "lucene-index.beir-v1.0.0-signal1m.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "d0828b92a3df814bfa4b73bddeb25da7",
+        "size compressed (bytes)": 496596576,
+        "total_terms": 32240069,
+        "documents": 2866315,
+        "unique_terms": 796647,
+        "downloaded": False
+    },
+    "beir-v1.0.0-trec-news.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): TREC-NEWS",
+        "filename": "lucene-index.beir-v1.0.0-trec-news.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "98df3de34b4b76a4390520c606817ec4",
+        "size compressed (bytes)": 2623576957,
+        "total_terms": 275651967,
+        "documents": 594589,
+        "unique_terms": 729872,
+        "downloaded": False
+    },
+    "beir-v1.0.0-robust04.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): Robust04",
+        "filename": "lucene-index.beir-v1.0.0-robust04.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "89dfcb7297c12a772d1bfd7917df908d",
+        "size compressed (bytes)": 1728446730,
+        "total_terms": 174384263,
+        "documents": 528036,
+        "unique_terms": 923466,
+        "downloaded": False
+    },
+    "beir-v1.0.0-arguana.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): ArguAna",
+        "filename": "lucene-index.beir-v1.0.0-arguana.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "d6c005689a9e7e91f3b1a7fbc74063e1",
+        "size compressed (bytes)": 10563485,
+        "total_terms": 969528,
+        "documents": 8674,
+        "unique_terms": 23895,
+        "downloaded": False
+    },
+    "beir-v1.0.0-webis-touche2020.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): Webis-Touche2020",
+        "filename": "lucene-index.beir-v1.0.0-webis-touche2020.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "20c6e9f29461eea1a520cd1abead709a",
+        "size compressed (bytes)": 750400932,
+        "total_terms": 76082209,
+        "documents": 382545,
+        "unique_terms": 525540,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-android.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-android",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "9f9f35e34f76336bc6e516599cbaf75b",
+        "size compressed (bytes)": 17423320,
+        "total_terms": 1760762,
+        "documents": 22998,
+        "unique_terms": 41456,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-english.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-english",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "7d887497d32eedd92c314c93feaca28e",
+        "size compressed (bytes)": 24949578,
+        "total_terms": 2236655,
+        "documents": 40221,
+        "unique_terms": 62517,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-gaming.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-gaming",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "140e16ee86a69c8fd4d16a83a6d51591",
+        "size compressed (bytes)": 29156970,
+        "total_terms": 2827717,
+        "documents": 45301,
+        "unique_terms": 60070,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-gis.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-gis",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "4bd93695f28af0a11172f387ef41fee6",
+        "size compressed (bytes)": 43396154,
+        "total_terms": 4048584,
+        "documents": 37637,
+        "unique_terms": 184133,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-mathematica.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-mathematica",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "5b5b7ab3d0437428e29a5a1431de1ca5",
+        "size compressed (bytes)": 21589909,
+        "total_terms": 2332642,
+        "documents": 16705,
+        "unique_terms": 111611,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-physics.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-physics",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "6864144bca1bb169a452321e14ef12e0",
+        "size compressed (bytes)": 37956215,
+        "total_terms": 3785483,
+        "documents": 38316,
+        "unique_terms": 55950,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-programmers.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-programmers",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "7b7d2bbf7cc5d53924d09c3b781dba8a",
+        "size compressed (bytes)": 40297069,
+        "total_terms": 3905694,
+        "documents": 32176,
+        "unique_terms": 74195,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-stats.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-stats",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "0b09b7bee2b60df0ff73710a93a79218",
+        "size compressed (bytes)": 52212599,
+        "total_terms": 5356042,
+        "documents": 42269,
+        "unique_terms": 183358,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-tex.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-tex",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "48a2541bd7d1adec06f053486655e815",
+        "size compressed (bytes)": 91819025,
+        "total_terms": 9556423,
+        "documents": 68184,
+        "unique_terms": 288088,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-unix.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-unix",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "a6cc0a867f6210ad44755c0a36fd682a",
+        "size compressed (bytes)": 53802808,
+        "total_terms": 5767374,
+        "documents": 47382,
+        "unique_terms": 206323,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-webmasters.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-webmasters",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "a04f65d575b4233a151c4960b82815b9",
+        "size compressed (bytes)": 15174811,
+        "total_terms": 1482585,
+        "documents": 17405,
+        "unique_terms": 40547,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-wordpress.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): CQADupStack-wordpress",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "4ab079b9f7d0463955ce073b5d53e64d",
+        "size compressed (bytes)": 54807597,
+        "total_terms": 5463472,
+        "documents": 48605,
+        "unique_terms": 125727,
+        "downloaded": False
+    },
+    "beir-v1.0.0-quora.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): Quora",
+        "filename": "lucene-index.beir-v1.0.0-quora.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "53fa2bd0667d23a50f95adaf169b87a1",
+        "size compressed (bytes)": 52698691,
+        "total_terms": 4390852,
+        "documents": 522931,
+        "unique_terms": 69597,
+        "downloaded": False
+    },
+    "beir-v1.0.0-dbpedia-entity.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): DBPedia",
+        "filename": "lucene-index.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "6bc15a920e262d12ec3842401755e934",
+        "size compressed (bytes)": 2085473498,
+        "total_terms": 164794982,
+        "documents": 4635922,
+        "unique_terms": 3351459,
+        "downloaded": False
+    },
+    "beir-v1.0.0-scidocs.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): SCIDOCS",
+        "filename": "lucene-index.beir-v1.0.0-scidocs.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "f1fba96a71a62bc567ecbd167de3794b",
+        "size compressed (bytes)": 186572809,
+        "total_terms": 3266767,
+        "documents": 25657,
+        "unique_terms": 63604,
+        "downloaded": False
+    },
+    "beir-v1.0.0-fever.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): FEVER",
+        "filename": "lucene-index.beir-v1.0.0-fever.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "1b06f43ea36e2ed450d1b1d90099ae67",
+        "size compressed (bytes)": 3880155553,
+        "total_terms": 325179165,
+        "documents": 5416568,
+        "unique_terms": 3293639,
+        "downloaded": False
+    },
+    "beir-v1.0.0-climate-fever.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): Climate-FEVER",
+        "filename": "lucene-index.beir-v1.0.0-climate-fever.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "68811e2614b3bac9e1b879c883fc722e",
+        "size compressed (bytes)": 3880208200,
+        "total_terms": 325185072,
+        "documents": 5416593,
+        "unique_terms": 3293621,
+        "downloaded": False
+    },
+    "beir-v1.0.0-scifact.flat": {
+        "description": "Lucene flat index of BEIR (v1.0.0): SciFact",
+        "filename": "lucene-index.beir-v1.0.0-scifact.flat.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-flat.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact.flat.20221116.505594.tar.gz"
+        ],
+        "md5": "6f6e55f1cf80c362f86bee65529b71de",
+        "size compressed (bytes)": 8851173,
+        "total_terms": 838128,
+        "documents": 5183,
+        "unique_terms": 28865,
+        "downloaded": False
+    },
+
+    # BEIR (v1.0.0) multifield indexes
+    "beir-v1.0.0-trec-covid.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): TREC-COVID",
+        "filename": "lucene-index.beir-v1.0.0-trec-covid.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "7501a330a0c9246e6350413c3f6ced7c",
+        "size compressed (bytes)": 222831983,
+        "total_terms": 19060122,
+        "documents": 129192,
+        "unique_terms": 193851,
+        "downloaded": False
+    },
+    "beir-v1.0.0-bioasq.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): BioASQ",
+        "filename": "lucene-index.beir-v1.0.0-bioasq.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "cc01ab450cac0b8865bd1e70e2a58596",
+        "size compressed (bytes)": 25346354679,
+        "total_terms": 2099554307,
+        "documents": 14914602,
+        "unique_terms": 4889053,
+        "downloaded": False
+    },
+    "beir-v1.0.0-nfcorpus.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): NFCorpus",
+        "filename": "lucene-index.beir-v1.0.0-nfcorpus.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "904e53b80fe04b3844b97847bc77a772",
+        "size compressed (bytes)": 6645576,
+        "total_terms": 601950,
+        "documents": 3633,
+        "unique_terms": 21819,
+        "downloaded": False
+    },
+    "beir-v1.0.0-nq.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): NQ",
+        "filename": "lucene-index.beir-v1.0.0-nq.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "693ca315de9fbbbf7f664be313a03847",
+        "size compressed (bytes)": 1642708204,
+        "total_terms": 144050891,
+        "documents": 2680961,
+        "unique_terms": 996653,
+        "downloaded": False
+    },
+    "beir-v1.0.0-hotpotqa.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): HotpotQA",
+        "filename": "lucene-index.beir-v1.0.0-hotpotqa.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "ef8c2f40097e652eec99e6bf25e151cd",
+        "size compressed (bytes)": 2083441492,
+        "total_terms": 158180692,
+        "documents": 5233235,
+        "unique_terms": 2627639,
+        "downloaded": False
+    },
+    "beir-v1.0.0-fiqa.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): FiQA-2018",
+        "filename": "lucene-index.beir-v1.0.0-fiqa.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "073f3f19a94689e5fac511af49316fe1",
+        "size compressed (bytes)": 55984419,
+        "total_terms": 5288635,
+        "documents": 57600,
+        "unique_terms": 66977,
+        "downloaded": False
+    },
+    "beir-v1.0.0-signal1m.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): Signal-1M",
+        "filename": "lucene-index.beir-v1.0.0-signal1m.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "4482ae02f18e8336c0a95ea33b5b6ede",
+        "size compressed (bytes)": 496603092,
+        "total_terms": 32240069,
+        "documents": 2866315,
+        "unique_terms": 796647,
+        "downloaded": False
+    },
+    "beir-v1.0.0-trec-news.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): TREC-NEWS",
+        "filename": "lucene-index.beir-v1.0.0-trec-news.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "3151122da3cf081a0c8894af7b75be43",
+        "size compressed (bytes)": 2633899363,
+        "total_terms": 270886723,
+        "documents": 578605,
+        "unique_terms": 727856,
+        "downloaded": False
+    },
+    "beir-v1.0.0-robust04.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): Robust04",
+        "filename": "lucene-index.beir-v1.0.0-robust04.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "fdf741a75efe089d0451de5720b52c3a",
+        "size compressed (bytes)": 1728446303,
+        "total_terms": 174384263,
+        "documents": 528036,
+        "unique_terms": 923466,
+        "downloaded": False
+    },
+    "beir-v1.0.0-arguana.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): ArguAna",
+        "filename": "lucene-index.beir-v1.0.0-arguana.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "a8201952860d31c56ea8a54c31e88b51",
+        "size compressed (bytes)": 10524118,
+        "total_terms": 944123,
+        "documents": 8674,
+        "unique_terms": 23867,
+        "downloaded": False
+    },
+    "beir-v1.0.0-webis-touche2020.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): Webis-Touche2020",
+        "filename": "lucene-index.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "e160ea813990cff4dbdb9f50d509f8ea",
+        "size compressed (bytes)": 750724439,
+        "total_terms": 74066724,
+        "documents": 382545,
+        "unique_terms": 524665,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-android.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-android",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "de85f92a018d83a7ea496d9ef955b8c5",
+        "size compressed (bytes)": 17887736,
+        "total_terms": 1591285,
+        "documents": 22998,
+        "unique_terms": 40824,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-english.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-english",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "71c5d3db04586283772f6069668f5bfa",
+        "size compressed (bytes)": 25558901,
+        "total_terms": 2006983,
+        "documents": 40221,
+        "unique_terms": 61530,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-gaming.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-gaming",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "ff7c628b568f916c3bc3f7bf2af831eb",
+        "size compressed (bytes)": 29992453,
+        "total_terms": 2510477,
+        "documents": 45300,
+        "unique_terms": 59113,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-gis.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-gis",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "4083830da4922d1294b3fb38873ba5a2",
+        "size compressed (bytes)": 44188661,
+        "total_terms": 3789161,
+        "documents": 37637,
+        "unique_terms": 183298,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-mathematica.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-mathematica",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "baa9414c385db88eaafffa95d5ec7d48",
+        "size compressed (bytes)": 21911919,
+        "total_terms": 2234369,
+        "documents": 16705,
+        "unique_terms": 111306,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-physics.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-physics",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "342b105462067b87e78730921dd7288d",
+        "size compressed (bytes)": 38736492,
+        "total_terms": 3542078,
+        "documents": 38316,
+        "unique_terms": 55229,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-programmers.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-programmers",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "2e95b82caf156d0f0b109c62e0011eab",
+        "size compressed (bytes)": 40982052,
+        "total_terms": 3682227,
+        "documents": 32176,
+        "unique_terms": 73765,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-stats.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-stats",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "87c53df624baed7921672286beb94f9c",
+        "size compressed (bytes)": 53094508,
+        "total_terms": 5073873,
+        "documents": 42269,
+        "unique_terms": 182933,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-tex.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-tex",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "86407171e4ff305ecb173afdd49eef7c",
+        "size compressed (bytes)": 93081190,
+        "total_terms": 9155405,
+        "documents": 68184,
+        "unique_terms": 287393,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-unix.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-unix",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "acb0cc50cccb9e8dfca0ed599df0cfaa",
+        "size compressed (bytes)": 54758816,
+        "total_terms": 5449726,
+        "documents": 47382,
+        "unique_terms": 205471,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-webmasters.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-webmasters",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "7701f016b6fc643c30630742f7712bbd",
+        "size compressed (bytes)": 15524400,
+        "total_terms": 1358292,
+        "documents": 17405,
+        "unique_terms": 40073,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-wordpress.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): CQADupStack-wordpress",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "d791cf8449a18ebe698d404f526375ee",
+        "size compressed (bytes)": 55738636,
+        "total_terms": 5151575,
+        "documents": 48605,
+        "unique_terms": 125110,
+        "downloaded": False
+    },
+    "beir-v1.0.0-quora.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): Quora",
+        "filename": "lucene-index.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "2d92b46f715df08ce146167ed1b12079",
+        "size compressed (bytes)": 52703122,
+        "total_terms": 4390852,
+        "documents": 522931,
+        "unique_terms": 69597,
+        "downloaded": False
+    },
+    "beir-v1.0.0-dbpedia-entity.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): DBPedia",
+        "filename": "lucene-index.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "b3f6b64bfd7903ff25ca2fa01a288392",
+        "size compressed (bytes)": 2144410289,
+        "total_terms": 152205479,
+        "documents": 4635922,
+        "unique_terms": 3338476,
+        "downloaded": False
+    },
+    "beir-v1.0.0-scidocs.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): SCIDOCS",
+        "filename": "lucene-index.beir-v1.0.0-scidocs.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "04c1e9aad3751dc552027d8bc3491323",
+        "size compressed (bytes)": 175887267,
+        "total_terms": 3065828,
+        "documents": 25313,
+        "unique_terms": 62562,
+        "downloaded": False
+    },
+    "beir-v1.0.0-fever.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): FEVER",
+        "filename": "lucene-index.beir-v1.0.0-fever.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "28ea09308760235ea2ec72d6f9b2f432",
+        "size compressed (bytes)": 3947213444,
+        "total_terms": 310655699,
+        "documents": 5396138,
+        "unique_terms": 3275057,
+        "downloaded": False
+    },
+    "beir-v1.0.0-climate-fever.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): Climate-FEVER",
+        "filename": "lucene-index.beir-v1.0.0-climate-fever.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "827f2759cdfc45c47bbb67835cfcb1f2",
+        "size compressed (bytes)": 3947277939,
+        "total_terms": 310661477,
+        "documents": 5396163,
+        "unique_terms": 3275068,
+        "downloaded": False
+    },
+    "beir-v1.0.0-scifact.multifield": {
+        "description": "Lucene multifield index of BEIR (v1.0.0): SciFact",
+        "filename": "lucene-index.beir-v1.0.0-scifact.multifield.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact.multifield.20221116.505594.tar.gz"
+        ],
+        "md5": "efbafbc3e4909a026fe80bf8b1444b08",
+        "size compressed (bytes)": 9078032,
+        "total_terms": 784591,
+        "documents": 5183,
+        "unique_terms": 28581,
+        "downloaded": False
+    }
+}
+
+TF_INDEX_INFO_MRTYDI = {
+    "mrtydi-v1.1-arabic": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Arabic).",
+        "filename": "lucene-index.mrtydi-v1.1-arabic.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-arabic.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-arabic.20220928.b5ecc5.tar.gz",
+        ],
+        "md5": "efff40a2548f759eb8b0e47e0622685b",
+        "size compressed (bytes)": 1420441600,
+        "total_terms": 92529032,
+        "documents": 2106586,
+        "unique_terms": 1284748,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-bengali": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Bengali).",
+        "filename": "lucene-index.mrtydi-v1.1-bengali.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-bengali.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-bengali.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "6ed844c8f17b2f041fba7c5676d3fb42",
+        "size compressed (bytes)": 294942720,
+        "total_terms": 15236599,
+        "documents": 304059,
+        "unique_terms": 520699,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-english": {
+        "description": "Lucene index for Mr.TyDi v1.1 (English).",
+        "filename": "lucene-index.mrtydi-v1.1-english.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-english.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-english.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "e6b0a2531d958c3d1a65634dc315b0ab",
+        "size compressed (bytes)": 20566118400,
+        "total_terms": 1507060932,
+        "documents": 32907100,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-finnish": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Finnish).",
+        "filename": "lucene-index.mrtydi-v1.1-finnish.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-finnish.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-finnish.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "0f464c022447eed5431157f0b2feb0b3",
+        "size compressed (bytes)": 1116272640,
+        "total_terms": 69416543,
+        "documents": 1908757,
+        "unique_terms": 1715076,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-indonesian": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Indonesian).",
+        "filename": "lucene-index.mrtydi-v1.1-indonesian.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-indonesian.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-indonesian.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "345d43a2443786a3394a93a6f7ef77b7",
+        "size compressed (bytes)": 698388480,
+        "total_terms": 52493134,
+        "documents": 1469399,
+        "unique_terms": 942552,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-japanese": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Japanese).",
+        "filename": "lucene-index.mrtydi-v1.1-japanese.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-japanese.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-japanese.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "5f0802c1257c325a3e25c58523dba841",
+        "size compressed (bytes)": 4333844480,
+        "total_terms": 300761975,
+        "documents": 7000027,
+        "unique_terms": 1588879,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-korean": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Korean).",
+        "filename": "lucene-index.mrtydi-v1.1-korean.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-korean.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-korean.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "4277f406b138c46edf7c17e4248f3b2e",
+        "size compressed (bytes)": 1349109760,
+        "total_terms": 122217295,
+        "documents": 1496126,
+        "unique_terms": 1517179,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-russian": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Russian).",
+        "filename": "lucene-index.mrtydi-v1.1-russian.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-russian.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-russian.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "d5837fee29c60c7a3a24cfd598056038",
+        "size compressed (bytes)": 6864660480,
+        "total_terms": 346329117,
+        "documents": 9597504,
+        "unique_terms": 3034240,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-swahili": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Swahili).",
+        "filename": "lucene-index.mrtydi-v1.1-swahili.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-swahili.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-swahili.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "bebff76ec6dfe76c904604f8ed1bcd3e",
+        "size compressed (bytes)": 59607040,
+        "total_terms": 4937051,
+        "documents": 136689,
+        "unique_terms": 385711,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-telugu": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Telugu).",
+        "filename": "lucene-index.mrtydi-v1.1-telugu.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-telugu.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-telugu.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "89f8b280cacbdc27e90bb1ea40029c21",
+        "size compressed (bytes)": 519157760,
+        "total_terms": 26812052,
+        "documents": 548224,
+        "unique_terms": 1157217,
+        "downloaded": False
+    },
+    "mrtydi-v1.1-thai": {
+        "description": "Lucene index for Mr.TyDi v1.1 (Thai).",
+        "filename": "lucene-index.mrtydi-v1.1-thai.20220928.b5ecc5.tar.gz",
+        "readme": "lucene-index.mrtydi-v1.1-thai.20220928.b5ecc5.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.mrtydi-v1.1-thai.20220928.b5ecc5.tar.gz"
+        ],
+        "md5": "047152fc6bc1b5c5d945f38b23de971e",
+        "size compressed (bytes)": 546201600,
+        "total_terms": 31550936,
+        "documents": 568855,
+        "unique_terms": 663628,
+        "downloaded": False
+    }
+}
+
+TF_INDEX_INFO_MIRACL = {
+    "miracl-v1.0-ar": {
+        "description": "Lucene index for MIRACL v1.0 (Arabic).",
+        "filename": "lucene-index.miracl-v1.0-ar.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-ar.20221004.2b2856.tar.gz"
+        ],
+        "md5": "503d3b49a557222d8074ac831a2f047a",
+        "size compressed (bytes)": 1193292491,
+        "total_terms": 90223450,
+        "documents": 2061414,
+        "unique_terms": 1246254,
+        "downloaded": False
+    },
+    "miracl-v1.0-bn": {
+        "description": "Lucene index for MIRACL v1.0 (Bengali).",
+        "filename": "lucene-index.miracl-v1.0-bn.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-bn.20221004.2b2856.tar.gz"
+        ],
+        "md5": "7a20210328f0b83f44e041f0c94d30e2",
+        "size compressed (bytes)": 236113202,
+        "total_terms": 14963235,
+        "documents": 297265,
+        "unique_terms": 506812,
+        "downloaded": False
+    },
+    "miracl-v1.0-en": {
+        "description": "Lucene index for MIRACL v1.0 (English).",
+        "filename": "lucene-index.miracl-v1.0-en.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-en.20221004.2b2856.tar.gz"
+        ],
+        "md5": "4fbd652deb76bcc05daa35392d4aa9f3",
+        "size compressed (bytes)": 17823436054,
+        "total_terms": 1505029955,
+        "documents": 32893221,
+        "unique_terms": 6152316,
+        "downloaded": False
+    },
+    "miracl-v1.0-es": {
+        "description": "Lucene index for MIRACL v1.0 (Spanish).",
+        "filename": "lucene-index.miracl-v1.0-es.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-es.20221004.2b2856.tar.gz"
+        ],
+        "md5": "b4c9993ee3a131871d4f07dd96e80531",
+        "size compressed (bytes)": 5474245249,
+        "total_terms": 389319806,
+        "documents": 10373953,
+        "unique_terms": 2907509,
+        "downloaded": False
+    },
+    "miracl-v1.0-fa": {
+        "description": "Lucene index for MIRACL v1.0 (Persian).",
+        "filename": "lucene-index.miracl-v1.0-fa.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-fa.20221004.2b2856.tar.gz"
+        ],
+        "md5": "bfc824aa37633e3d45bcfd5c5e0e1701",
+        "size compressed (bytes)": 1023090577,
+        "total_terms": 67968038,
+        "documents": 2207172,
+        "unique_terms": 1208930,
+        "downloaded": False
+    },
+    "miracl-v1.0-fi": {
+        "description": "Lucene index for MIRACL v1.0 (Finnish).",
+        "filename": "lucene-index.miracl-v1.0-fi.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-fi.20221004.2b2856.tar.gz"
+        ],
+        "md5": "4197c90efd781c6153acaf15452c5479",
+        "size compressed (bytes)": 925422988,
+        "total_terms": 68295087,
+        "documents": 1883509,
+        "unique_terms": 1669817,
+        "downloaded": False
+    },
+    "miracl-v1.0-fr": {
+        "description": "Lucene index for MIRACL v1.0 (French).",
+        "filename": "lucene-index.miracl-v1.0-fr.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-fr.20221004.2b2856.tar.gz"
+        ],
+        "md5": "e68b10d90be71b702888a3d00a8aa39c",
+        "size compressed (bytes)": 6747612709,
+        "total_terms": 508723988,
+        "documents": 14636953,
+        "unique_terms": 2811342,
+        "downloaded": False
+    },
+    "miracl-v1.0-hi": {
+        "description": "Lucene index for MIRACL v1.0 (Hindi).",
+        "filename": "lucene-index.miracl-v1.0-hi.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-hi.20221004.2b2856.tar.gz"
+        ],
+        "md5": "d81f4e2b7ec5df8f9741168c23c977e2",
+        "size compressed (bytes)": 340997734,
+        "total_terms": 21080143,
+        "documents": 506264,
+        "unique_terms": 597558,
+        "downloaded": False
+    },
+    "miracl-v1.0-id": {
+        "description": "Lucene index for MIRACL v1.0 (Indonesian).",
+        "filename": "lucene-index.miracl-v1.0-id.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-id.20221004.2b2856.tar.gz"
+        ],
+        "md5": "b1092e732991029fae7c542e5e129255",
+        "size compressed (bytes)": 577263718,
+        "total_terms": 51469219,
+        "documents": 1446315,
+        "unique_terms": 911944,
+        "downloaded": False
+    },
+    "miracl-v1.0-ja": {
+        "description": "Lucene index for MIRACL v1.0 (Japanese).",
+        "filename": "lucene-index.miracl-v1.0-ja.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-ja.20221004.2b2856.tar.gz"
+        ],
+        "md5": "4db9550d0af63736a0fd2b486b3b7273",
+        "size compressed (bytes)": 3745158372,
+        "total_terms": 296659169,
+        "documents": 6953614,
+        "unique_terms": 1558643,
+        "downloaded": False
+    },
+    "miracl-v1.0-ko": {
+        "description": "Lucene index for MIRACL v1.0 (Korean).",
+        "filename": "lucene-index.miracl-v1.0-ko.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-ko.20221004.2b2856.tar.gz"
+        ],
+        "md5": "c82f5c7641fd78b8dadfcb279a1c0340",
+        "size compressed (bytes)": 1150899287,
+        "total_terms": 121464424,
+        "documents": 1486752,
+        "unique_terms": 1504782,
+        "downloaded": False
+    },
+    "miracl-v1.0-ru": {
+        "description": "Lucene index for MIRACL v1.0 (Russian).",
+        "filename": "lucene-index.miracl-v1.0-ru.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-ru.20221004.2b2856.tar.gz"
+        ],
+        "md5": "c1b974e298d9e1deeccae8b84a5bcd64",
+        "size compressed (bytes)": 6003987738,
+        "total_terms": 343106870,
+        "documents": 9543918,
+        "unique_terms": 2955627,
+        "downloaded": False
+    },
+    "miracl-v1.0-sw": {
+        "description": "Lucene index for MIRACL v1.0 (Swahili).",
+        "filename": "lucene-index.miracl-v1.0-sw.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-sw.20221004.2b2856.tar.gz"
+        ],
+        "md5": "64b77bcc11e04575d0723ad81ac7c135",
+        "size compressed (bytes)": 45410264,
+        "total_terms": 4752278,
+        "documents": 131924,
+        "unique_terms": 361306,
+        "downloaded": False
+    },
+    "miracl-v1.0-te": {
+        "description": "Lucene index for MIRACL v1.0 (Telugu).",
+        "filename": "lucene-index.miracl-v1.0-te.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-te.20221004.2b2856.tar.gz"
+        ],
+        "md5": "1f78c68678f439a3143a6fb0d25bfe27",
+        "size compressed (bytes)": 402045711,
+        "total_terms": 26105595,
+        "documents": 518079,
+        "unique_terms": 1120047,
+        "downloaded": False
+    },
+    "miracl-v1.0-th": {
+        "description": "Lucene index for MIRACL v1.0 (Thai).",
+        "filename": "lucene-index.miracl-v1.0-th.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-th.20221004.2b2856.tar.gz"
+        ],
+        "md5": "eeef93c23b76fdc66b9e1ee01576765e",
+        "size compressed (bytes)": 431498349,
+        "total_terms": 29922100,
+        "documents": 542166,
+        "unique_terms": 626084,
+        "downloaded": False
+    },
+    "miracl-v1.0-zh": {
+        "description": "Lucene index for MIRACL v1.0 (Chinese).",
+        "filename": "lucene-index.miracl-v1.0-zh.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-zh.20221004.2b2856.tar.gz"
+        ],
+        "md5": "dc7880da333b7c56d3a4ff0bf018febd",
+        "size compressed (bytes)": 4212198217,
+        "total_terms": 423635495,
+        "documents": 4934368,
+        "unique_terms": 6517412,
+        "downloaded": False
+    },
+    "miracl-v1.0-de": {
+        "description": "Lucene index for MIRACL v1.0 (German).",
+        "filename": "lucene-index.miracl-v1.0-de.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-de.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a40d1b9429c450b2e476d1e4ba22784d",
+        "size compressed (bytes)": 8708219012,
+        "total_terms": 581583743,
+        "documents": 15866222,
+        "unique_terms": 6288858,
+        "downloaded": False
+    },
+    "miracl-v1.0-yo": {
+        "description": "Lucene index for MIRACL v1.0 (Yoruba).",
+        "filename": "lucene-index.miracl-v1.0-yo.20221004.2b2856.tar.gz",
+        "readme": "lucene-index.miracl-v1.0.20221004.2b2856.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.miracl-v1.0-yo.20221004.2b2856.tar.gz"
+        ],
+        "md5": "7fa283d1af4a7c4ea8791eab9e386807",
+        "size compressed (bytes)": 13211664,
+        "total_terms": 1387088,
+        "documents": 49043,
+        "unique_terms": 174539,
+        "downloaded": False
+    }
+}
+
+TF_INDEX_INFO_CIRAL = {
+    "ciral-v1.0-ha": {
+        "description": "Lucene index for CIRAL v1.0 (Hausa).",
+        "filename": "lucene-index.ciral-v1.0-ha.20230721.e850ea.tar.gz",
+        "readme": "lucene-index.ciral-v1.0.20230721.e850ea.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.ciral-v1.0-ha.20230721.e850ea.tar.gz"
+        ],
+        "md5": "9bef13f2b528d3a5712ce412c3c264f7",
+        "size compressed (bytes)": 671653035,
+        'total_terms': 93696543,
+        'documents': 715355,
+        'unique_terms': 817967,
+        "downloaded": False
+    },
+
+    "ciral-v1.0-so": {
+        "description": "Lucene index for CIRAL v1.0 (Somali).",
+        "filename": "lucene-index.ciral-v1.0-so.20230721.e850ea.tar.gz",
+        "readme": "lucene-index.ciral-v1.0.20230721.e850ea.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.ciral-v1.0-so.20230721.e850ea.tar.gz"
+        ],
+        "md5": "4bb9d3ae1a6d65fbb2a4e7e57a71397d",
+        "size compressed (bytes)": 916229181,
+        "total_terms": 103736362,
+        "documents": 827552,
+        "unique_terms": 1636109,
+        "downloaded": False
+    },
+
+    "ciral-v1.0-sw": {
+        "description": "Lucene index for CIRAL v1.0 (Swahili).",
+        "filename": "lucene-index.ciral-v1.0-sw.20230721.e850ea.tar.gz",
+        "readme": "lucene-index.ciral-v1.0.20230721.e850ea.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.ciral-v1.0-sw.20230721.e850ea.tar.gz"
+        ],
+        "md5": "1236a1a4c87268d98ec6534cd99aaada",
+        "size compressed (bytes)": 896921754,
+        "total_terms": 115140711,
+        "documents": 949013,
+        "unique_terms": 1655554,
+        "downloaded": False
+    },
+
+    "ciral-v1.0-yo": {
+        "description": "Lucene index for CIRAL v1.0 (Yoruba).",
+        "filename": "lucene-index.ciral-v1.0-yo.20230721.e850ea.tar.gz",
+        "readme": "lucene-index.ciral-v1.0.20230721.e850ea.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.ciral-v1.0-yo.20230721.e850ea.tar.gz"
+        ],
+        "md5": "655e571314ed85cbfe637246c3d18110",
+        "size compressed (bytes)": 94610259,
+        "total_terms": 13693080,
+        "documents": 82095,
+        "unique_terms": 236638,
+        "downloaded": False
+    }
+
+}
+
+
+TF_INDEX_INFO_OTHER = {
+    "cacm": {
+        "description": "Lucene index of the CACM corpus. (Lucene 9)",
+        "filename": "lucene-index.cacm.tar.gz",
+        "urls": [
+            "https://github.com/castorini/anserini-data/raw/master/CACM/lucene-index.cacm.20221005.252b5e.tar.gz",
+        ],
+        "md5": "cfe14d543c6a27f4d742fb2d0099b8e0",
+        "size compressed (bytes)": 2347197,
+        "total_terms": 320968,
+        "documents": 3204,
+        "unique_terms": 14363,
+    },
+    "robust04": {
+        "description": "Lucene index of TREC Disks 4 & 5 (minus Congressional Records), used in the TREC 2004 Robust Track. (Lucene 9)",
+        "filename": "lucene-index.robust04.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.robust04.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.robust04.20221005.252b5e.tar.gz",
+        ],
+        "md5": "a1abd5437394956b7ec8bea4699b5e46",
+        "size compressed (bytes)": 1806776535,
+        "total_terms": 174540872,
+        "documents": 528030,
+        "unique_terms": 923436,
+    },
+
+    "enwiki-paragraphs": {
+        "description": "Lucene index of English Wikipedia for BERTserini",
+        "filename": "lucene-index.enwiki-20180701-paragraphs.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.enwiki-20180701-paragraphs.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/WHKMSCbwQfDXyHt/download"
+        ],
+        "md5": "77d1cd530579905dad2ee3c2bda1b73d",
+        "size compressed (bytes)": 17725958785,
+        "total_terms": 1498980668,
+        "documents": 39880064,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+    "zhwiki-paragraphs": {
+        "description": "Lucene index of Chinese Wikipedia for BERTserini",
+        "filename": "lucene-index.zhwiki-20181201-paragraphs.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.zhwiki-20181201-paragraphs.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/6kEjQZaRYtnb8A6/download"
+        ],
+        "md5": "c005af4036296972831288c894918a92",
+        "size compressed (bytes)": 3284531213,
+        "total_terms": 320776789,
+        "documents": 4170312,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+
+    "trec-covid-r5-abstract": {
+        "description": "Lucene index for TREC-COVID Round 5: abstract index",
+        "filename": "lucene-index-cord19-abstract-2020-07-16.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-07-16/lucene-index-cord19-abstract-2020-07-16.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/c37JxKYQ7Hogs72/download"
+        ],
+        "md5": "c883571ccc78b4c2ce05b41eb07f5405",
+        "size compressed (bytes)": 2796524,
+        "total_terms": 22100404,
+        "documents": 192459,
+        "unique_terms": 195875,
+        "downloaded": False
+    },
+    "trec-covid-r5-full-text": {
+        "description": "Lucene index for TREC-COVID Round 5: full-text index",
+        "filename": "lucene-index-cord19-full-text-2020-07-16.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-07-16/lucene-index-cord19-full-text-2020-07-16.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/c7CcxRbFWfiFnFq/download"
+        ],
+        "md5": "23cfad89b4c206d66125f5736f60248f",
+        "size compressed (bytes)": 5351744,
+        "total_terms": 275238847,
+        "documents": 192460,
+        "unique_terms": 1843368,
+        "downloaded": False
+    },
+    "trec-covid-r5-paragraph": {
+        "description": "Lucene index for TREC-COVID Round 5: paragraph index",
+        "filename": "lucene-index-cord19-paragraph-2020-07-16.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-07-16/lucene-index-cord19-paragraph-2020-07-16.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/HXigraF5KJL3xS8/download"
+        ],
+        "md5": "c2c6ac832f8a1fcb767d2356d2b1e1df",
+        "size compressed (bytes)": 11352968,
+        "total_terms": 627083574,
+        "documents": 3010497,
+        "unique_terms": 1843368,
+        "downloaded": False
+    },
+    "trec-covid-r4-abstract": {
+        "description": "Lucene index for TREC-COVID Round 4: abstract index",
+        "filename": "lucene-index-cord19-abstract-2020-06-19.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-06-19/lucene-index-cord19-abstract-2020-06-19.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/fBta6sAt4MdaHQX/download"
+        ],
+        "md5": "029bd55daba8800fbae2be9e5fcd7b33",
+        "size compressed (bytes)": 2584264,
+        "total_terms": 18724353,
+        "documents": 158226,
+        "unique_terms": 179937,
+        "downloaded": False
+    },
+    "trec-covid-r4-full-text": {
+        "description": "Lucene index for TREC-COVID Round 4: full-text index",
+        "filename": "lucene-index-cord19-full-text-2020-06-19.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-06-19/lucene-index-cord19-full-text-2020-06-19.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/yErSHZHD38jcDSY/download"
+        ],
+        "md5": "3d0eb12094a24cff9bcacd1f17c3ea1c",
+        "size compressed (bytes)": 4983900,
+        "total_terms": 254810123,
+        "documents": 158227,
+        "unique_terms": 1783089,
+        "downloaded": False
+    },
+    "trec-covid-r4-paragraph": {
+        "description": "Lucene index for TREC-COVID Round 4: paragraph index",
+        "filename": "lucene-index-cord19-paragraph-2020-06-19.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-06-19/lucene-index-cord19-paragraph-2020-06-19.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/7md4kwNNgy3oxiH/download"
+        ],
+        "md5": "5cd8cd6998177bed7a3e0057ef8b3595",
+        "size compressed (bytes)": 10382704,
+        "total_terms": 567579834,
+        "documents": 2781172,
+        "unique_terms": 1783089,
+        "downloaded": False
+    },
+    "trec-covid-r3-abstract": {
+        "description": "Lucene index for TREC-COVID Round 3: abstract index",
+        "filename": "lucene-index-cord19-abstract-2020-05-19.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-05-19/lucene-index-cord19-abstract-2020-05-19.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/Zg9p2D5tJgiTGx2/download"
+        ],
+        "md5": "37bb97d0c41d650ba8e135fd75ae8fd8",
+        "size compressed (bytes)": 2190328,
+        "total_terms": 16278419,
+        "documents": 128465,
+        "unique_terms": 168291,
+        "downloaded": False
+    },
+    "trec-covid-r3-full-text": {
+        "description": "Lucene index for TREC-COVID Round 3: full-text index",
+        "filename": "lucene-index-cord19-full-text-2020-05-19.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-05-19/lucene-index-cord19-full-text-2020-05-19.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/BTzaQgZ55898dXM/download"
+        ],
+        "md5": "f5711915a66cd2b511e0fb8d03e4c325",
+        "size compressed (bytes)": 4233300,
+        "total_terms": 215806519,
+        "documents": 128465,
+        "unique_terms": 1620335,
+        "downloaded": False
+    },
+    "trec-covid-r3-paragraph": {
+        "description": "Lucene index for TREC-COVID Round 3: paragraph index",
+        "filename": "lucene-index-cord19-paragraph-2020-05-19.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-05-19/lucene-index-cord19-paragraph-2020-05-19.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/nPyMYTys6NkmEdN/download"
+        ],
+        "md5": "012ab1f804382b2275c433a74d7d31f2",
+        "size compressed (bytes)": 9053524,
+        "total_terms": 485309568,
+        "documents": 2297201,
+        "unique_terms": 1620335,
+        "downloaded": False
+    },
+    "trec-covid-r2-abstract": {
+        "description": "Lucene index for TREC-COVID Round 2: abstract index",
+        "filename": "lucene-index-cord19-abstract-2020-05-01.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-05-01/lucene-index-cord19-abstract-2020-05-01.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/3YZE65FSypwfnQQ/download"
+        ],
+        "md5": "a06e71a98a68d31148cb0e97e70a2ee1",
+        "size compressed (bytes)": 1575804,
+        "total_terms": 7651125,
+        "documents": 59873,
+        "unique_terms": 109750,
+        "downloaded": False
+    },
+    "trec-covid-r2-full-text": {
+        "description": "Lucene index for TREC-COVID Round 2: full-text index",
+        "filename": "lucene-index-cord19-full-text-2020-05-01.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-05-01/lucene-index-cord19-full-text-2020-05-01.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/NdPEB7swXeZnq3o/download"
+        ],
+        "md5": "e7eca1b976cdf2cd80e908c9ac2263cb",
+        "size compressed (bytes)": 3088540,
+        "total_terms": 154736295,
+        "documents": 59876,
+        "unique_terms": 1214374,
+        "downloaded": False
+    },
+    "trec-covid-r2-paragraph": {
+        "description": "Lucene index for TREC-COVID Round 2: paragraph index",
+        "filename": "lucene-index-cord19-paragraph-2020-05-01.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-05-01/lucene-index-cord19-paragraph-2020-05-01.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/Mz7n5FAt7rmnYCY/download"
+        ],
+        "md5": "8f9321757a03985ac1c1952b2fff2c7d",
+        "size compressed (bytes)": 6881696,
+        "total_terms": 360119048,
+        "documents": 1758168,
+        "unique_terms": 1214374,
+        "downloaded": False
+    },
+    "trec-covid-r1-abstract": {
+        "description": "Lucene index for TREC-COVID Round 1: abstract index",
+        "filename": "lucene-index-covid-2020-04-10.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-04-10/lucene-index-covid-2020-04-10.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/Rz8AEmsFo9NWGP6/download"
+        ],
+        "md5": "ec239d56498c0e7b74e3b41e1ce5d42a",
+        "size compressed (bytes)": 1621440,
+        "total_terms": 6672525,
+        "documents": 51069,
+        "unique_terms": 104595,
+        "downloaded": False
+    },
+    "trec-covid-r1-full-text": {
+        "description": "Lucene index for TREC-COVID Round 1: full-text index",
+        "filename": "lucene-index-covid-full-text-2020-04-10.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-04-10/lucene-index-covid-full-text-2020-04-10.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/oQzSoxrT3grGmBe/download"
+        ],
+        "md5": "401a6f5583b0f05340c73fbbeb3279c8",
+        "size compressed (bytes)": 4471820,
+        "total_terms": 315624154,
+        "documents": 51071,
+        "unique_terms": 1812522,
+        "downloaded": False
+    },
+    "trec-covid-r1-paragraph": {
+        "description": "Lucene index for TREC-COVID Round 1: paragraph index",
+        "filename": "lucene-index-covid-paragraph-2020-04-10.tar.gz",
+        "urls": [
+            "https://git.uwaterloo.ca/jimmylin/cord19-indexes/raw/master/2020-04-10/lucene-index-covid-paragraph-2020-04-10.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/HDtb5Ys7MYBkePC/download"
+        ],
+        "md5": "8b87a2c55bc0a15b87f11e796860216a",
+        "size compressed (bytes)": 5994192,
+        "total_terms": 330715243,
+        "documents": 1412648,
+        "unique_terms": 944574,
+        "downloaded": False
+    },
+
+    "cast2019": {
+        "description": "Lucene index for TREC 2019 CaST",
+        "filename": "index-cast2019.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/index-cast2019.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/56LcDcRPopdQc4d/download"
+        ],
+        "md5": "36e604d7f5a4e08ade54e446be2f6345",
+        "size compressed (bytes)": 21266884884,
+        "total_terms": 1593628213,
+        "documents": 38429835,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+
+    "wikipedia-dpr-100w": {
+        "description": "Lucene index of Wikipedia with DPR 100-word splits",
+        "filename": "lucene-index.wikipedia-dpr-100w.20210120.d1b9e6.tar.gz",
+        "readme": "index-wikipedia-dpr-20210120-d1b9e6-readme.txt",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.wikipedia-dpr-100w.20210120.d1b9e6.tar.gz"
+        ],
+        "md5": "7b58c08da992b2ea7e96667f0b176651",
+        "size compressed (bytes)": 9177917732,
+        "total_terms": 1512973270,
+        "documents": 21015324,
+        "unique_terms": 5345463,
+        "downloaded": False
+    },
+    "wikipedia-dpr-100w-slim": {
+        "description": "Lucene index of Wikipedia with DPR 100-word splits (slim version, document text not stored)",
+        "filename": "lucene-index.wikipedia-dpr-100w-slim.20210120.d1b9e6.tar.gz",
+        "readme": "index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.wikipedia-dpr-100w-slim.20210120.d1b9e6.tar.gz"
+        ],
+        "md5": "5d24352f0de6ae75b60e11a9cf622251",
+        "size compressed (bytes)": 1810337190,
+        "total_terms": 1512973270,
+        "documents": 21015324,
+        "unique_terms": 5345463,
+        "downloaded": False
+    },
+    "wikipedia-kilt-doc": {
+        "description": "Lucene index of Wikipedia snapshot used as KILT's knowledge source.",
+        "filename": "lucene-index.wikipedia-kilt-doc.20210421.f29307.tar.gz",
+        "readme": "index-wikipedia-kilt-doc-20210421-f29307-readme.txt",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.wikipedia-kilt-doc.20210421.f29307.tar.gz"
+        ],
+        "md5": "d4a1e7628f6f68c51dd2d764e62b7f8d",
+        "size compressed (bytes)": 10901145611,
+        "total_terms": 1915061164,
+        "documents": 5903530,
+        "unique_terms": 8722502,
+        "downloaded": False
+    },
+    "wiki-all-6-3-tamber": {
+        "description": "Lucene index of wiki-all-6-3-tamber from castorini/odqa-wiki-corpora",
+        "filename": "lucene-index.wiki-all-6-3-tamber.20230111.40277a.tar.gz",
+        "readme": "lucene-index-wiki-all-6-3-tamber-20230111-40277a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.wiki-all-6-3-tamber.20230111.40277a.tar.gz",
+        ],
+        "md5": "018b45ee8c6278a879caa3145b2dc05d",
+        "size compressed (bytes)": 26240661946,
+        "total_terms": 5064706668,
+        "documents": 76680040,
+        "unique_terms": 14604922,
+        "downloaded": False
+    },
+
+    "hc4-v1.0-fa": {
+        "description": "Lucene index for HC4 v1.0 (Persian). (Lucene 9)",
+        "filename": "lucene-index.hc4-v1.0-fa.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.hc4-v1.0.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.hc4-v1.0-fa.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "80735c01b2f2cf82288381370adf1d66",
+        "size compressed (bytes)": 1652960750,
+        "total_terms": 112225896,
+        "documents": 486486,
+        "unique_terms": 617109,
+        "downloaded": False
+    },
+    "hc4-v1.0-ru": {
+        "description": "Lucene index for HC4 v1.0 (Russian). (Lucene 9)",
+        "filename": "lucene-index.hc4-v1.0-ru.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.hc4-v1.0.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.hc4-v1.0-ru.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "40259ba9ca993f850c960a172debe33e",
+        "size compressed (bytes)": 13292705599,
+        "total_terms": 764996714,
+        "documents": 4721064,
+        "unique_terms": 2625222,
+        "downloaded": False
+    },
+    "hc4-v1.0-zh": {
+        "description": "Lucene index for HC4 v1.0 (Chinese). (Lucene 9)",
+        "filename": "lucene-index.hc4-v1.0-zh.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.hc4-v1.0.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.hc4-v1.0-zh.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "2ea8885b8ec6c637971c8df0706b623e",
+        "size compressed (bytes)": 2899033342,
+        "total_terms": 304468580,
+        "documents": 646302,
+        "unique_terms": 4380932,
+        "downloaded": False
+    },
+    "neuclir22-fa": {
+        "description": "Lucene index for NeuCLIR 2022 corpus (Persian). (Lucene 9)",
+        "filename": "lucene-index.neuclir22-fa.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.neuclir22.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.neuclir22-fa.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "d423fb72bcd5bf2dea6e4a19743dcb95",
+        "size compressed (bytes)": 7565790180,
+        "total_terms": 514262091,
+        "documents": 2232016,
+        "unique_terms": 1479443,
+        "downloaded": False
+    },
+    "neuclir22-ru": {
+        "description": "Lucene index for NeuCLIR 2022 corpus (Russian). (Lucene 9)",
+        "filename": "lucene-index.neuclir22-ru.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.neuclir22.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.neuclir22-ru.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "2d04bbc880d535c1c4ab172c2c2d8ffe",
+        "size compressed (bytes)": 14202967387,
+        "total_terms": 830006658,
+        "documents": 4627541,
+        "unique_terms": 3396095,
+        "downloaded": False
+    },
+    "neuclir22-zh": {
+        "description": "Lucene index for NeuCLIR 2022 corpus (Chinese). (Lucene 9)",
+        "filename": "lucene-index.neuclir22-zh.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.neuclir22.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.neuclir22-zh.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "46fe989676ff510b997af24f6398199f",
+        "size compressed (bytes)": 15733809682,
+        "total_terms": 1654090507,
+        "documents": 3179206,
+        "unique_terms": 8213058,
+        "downloaded": False
+    },
+    "neuclir22-fa-en": {
+        "description": "Lucene index for NeuCLIR 2022 corpus (official English translation from Persian). (Lucene 9)",
+        "filename": "lucene-index.neuclir22-fa-en.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.neuclir22-en.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.neuclir22-fa-en.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "35363339b7f0527f27403b848fe01b04",
+        "size compressed (bytes)": 6172239242,
+        "total_terms": 554848215,
+        "documents": 2232016,
+        "unique_terms": 1033260,
+        "downloaded": False
+    },
+    "neuclir22-ru-en": {
+        "description": "Lucene index for NeuCLIR 2022 corpus (official English translation from Russian). (Lucene 9)",
+        "filename": "lucene-index.neuclir22-ru-en.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.neuclir22-en.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.neuclir22-ru-en.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "b0b98803260665eeae97163d2361838e",
+        "size compressed (bytes)": 10513242212,
+        "total_terms": 911886830,
+        "documents": 4627541,
+        "unique_terms": 2794257,
+        "downloaded": False
+    },
+    "neuclir22-zh-en": {
+        "description": "Lucene index for NeuCLIR 2022 corpus (official English translation from Chinese). (Lucene 9)",
+        "filename": "lucene-index.neuclir22-zh-en.20221025.c4a8d0.tar.gz",
+        "readme": "lucene-index.neuclir22-en.20221025.c4a8d0.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.neuclir22-zh-en.20221025.c4a8d0.tar.gz"
+        ],
+        "md5": "d44ca9c7b634cf56e8cfd5892a3d3427",
+        "size compressed (bytes)": 8470981318,
+        "total_terms": 803227160,
+        "documents": 3179206,
+        "unique_terms": 1616532,
+        "downloaded": False
+    },  # TODO: need to update links to these files to rgw.cs.uwaterloo.ca/...
+    "atomic_text_v0.2.1_small_validation": {
+        "description": "Lucene index for AToMiC Text v0.2.1 small setting on validation set (Lucene 9)",
+        "filename": "lucene-index.atomic.image.flat.small.validation.tar.gz",
+        "readme": "lucene-index.atomic.20230525.a7df7f.README.md",
+        "urls": [
+            "https://huggingface.co/spaces/dlrudwo1269/AToMiC_bm25_files/resolve/main/prebuilt_indexes/lucene-index.atomic.text.flat.small.validation.tar.gz"
+        ],
+        "md5": "377f3e4ae48e1afbe05650e339322050",
+        "size compressed (bytes)": 32900945,
+        "total_terms": 2999824,
+        "documents": 17173,
+        "unique_terms": 118071,
+        "downloaded": False
+    },
+    "atomic_text_v0.2.1_base": {
+        "description": "Lucene index for AToMiC Text v0.2.1 base setting on validation set (Lucene 9)",
+        "filename": "lucene-index.atomic.image.flat.base.tar.gz",
+        "readme": "lucene-index.atomic.20230525.a7df7f.README.md",
+        "urls": [
+            "https://huggingface.co/spaces/dlrudwo1269/AToMiC_bm25_files/resolve/main/prebuilt_indexes/lucene-index.atomic.text.flat.base.tar.gz"
+        ],
+        "md5": "41ca80241e77ed3515dd48bfc047a923",
+        "size compressed (bytes)": 5532178004,
+        "total_terms": 520954965,
+        "documents": 3029504,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+    "atomic_text_v0.2.1_large": {
+        "description": "Lucene index for AToMiC Text v0.2.1 large setting on validation set (Lucene 9)",
+        "filename": "lucene-index.atomic.image.flat.large.tar.gz",
+        "readme": "lucene-index.atomic.20230525.a7df7f.README.md",
+        "urls": [
+            "https://huggingface.co/spaces/dlrudwo1269/AToMiC_bm25_files/resolve/main/prebuilt_indexes/lucene-index.atomic.text.flat.large.tar.gz"
+        ],
+        "md5": "0dd1975d82fa7c57a471e4e6b1882177",
+        "size compressed (bytes)": 18224101285,
+        "total_terms": 1727597393,
+        "documents": 10134744,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+    "atomic_image_v0.2_small_validation": {
+        "description": "Lucene index for AToMiC Images v0.2 small setting on validation set (Lucene 9)",
+        "filename": "lucene-index.atomic.image.flat.small.validation.tar.gz",
+        "readme": "lucene-index.atomic.20230525.a7df7f.README.md",
+        "urls": [
+            "https://huggingface.co/spaces/dlrudwo1269/AToMiC_bm25_files/resolve/main/prebuilt_indexes/lucene-index.atomic.image.flat.small.validation.tar.gz"
+        ],
+        "md5": "b5363a9a7ecd0f071fb8e0319168ccf8",
+        "size compressed (bytes)": 4902534,
+        "total_terms": 308646,
+        "documents": 16126,
+        "unique_terms": 48666,
+        "downloaded": False
+    },
+    "atomic_image_v0.2_base": {
+        "description": "Lucene index for AToMiC Images v0.2 base setting on validation set (Lucene 9)",
+        "filename": "lucene-index.atomic.image.flat.base.tar.gz",
+        "readme": "lucene-index.atomic.20230525.a7df7f.README.md",
+        "urls": [
+            "https://huggingface.co/spaces/dlrudwo1269/AToMiC_bm25_files/resolve/main/prebuilt_indexes/lucene-index.atomic.image.flat.base.tar.gz"
+        ],
+        "md5": "55e88e334165b7147092ee67dfa74955",
+        "size compressed (bytes)": 1218292466,
+        "total_terms": 100743397,
+        "documents": 3410779,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+    "atomic_image_v0.2_large": {
+        "description": "Lucene index for AToMiC Images v0.2 large setting on validation set (Lucene 9)",
+        "filename": "lucene-index.atomic.image.flat.large.tar.gz",
+        "readme": "lucene-index.atomic.20230525.a7df7f.README.md",
+        "urls": [
+            "https://huggingface.co/spaces/dlrudwo1269/AToMiC_bm25_files/resolve/main/prebuilt_indexes/lucene-index.atomic.image.flat.large.tar.gz"
+        ],
+        "md5": "919c3f870968ffbe24f30407ad1385f8",
+        "size compressed (bytes)": 1341866370,
+        "total_terms": 108550562,
+        "documents": 3803656,
+        "unique_terms": -1,
+        "downloaded": False
+    },
+}
+
+TF_INDEX_INFO = {**TF_INDEX_INFO_MSMARCO,
+                 **TF_INDEX_INFO_BEIR,
+                 **TF_INDEX_INFO_MRTYDI,
+                 **TF_INDEX_INFO_MIRACL,
+                 **TF_INDEX_INFO_CIRAL,
+                 **TF_INDEX_INFO_OTHER}
+
+IMPACT_INDEX_INFO_MSMARCO = {
+    "msmarco-v1-passage-slimr": {
+        "description": "Lucene impact index of the MS MARCO V1 passage corpus enoded by SLIM trained with BM25 negatives. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-slimr.20230220.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-slimr.20230220.md",
+        "urls": [
+            "https://vault.cs.uwaterloo.ca/s/EptAojzmCxz7mYM/download",
+        ],
+        "md5": "79e566fee4f376096e12a33cf67c8012",
+        "size compressed (bytes)": 1942207690,
+        "total_terms": 100694232684,
+        "documents": 8841823,
+        "unique_terms": 28121,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-slimr-pp": {
+        "description": "Lucene impact index of the MS MARCO V1 passage corpus enoded by SLIM trained with cross-encoder distillation and hardnegative mining. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-slimr-pp.20230220.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-slimr-pp.20230220.md",
+        "urls": [
+            "https://vault.cs.uwaterloo.ca/s/22Gjmnp5EP2HpqR/download",
+        ],
+        "md5": "17b2edd909bcda4980a93fb0ab87e72b",
+        "size compressed (bytes)": 2164253966,
+        "total_terms": 104421954301,
+        "documents": 8841823,
+        "unique_terms": 27766,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-unicoil": {
+        "description": "Lucene impact index of the MS MARCO V1 passage corpus for uniCOIL. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-unicoil.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-unicoil.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-unicoil.20221005.252b5e.tar.gz",
+        ],
+        "md5": "29521fa94165e87caaaddcb5b0d37b13",
+        "size compressed (bytes)": 1161034003,
+        "total_terms": 44495093768,
+        "documents": 8841823,
+        "unique_terms": 27678,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-unicoil-noexp": {
+        "description": "Lucene impact index of the MS MARCO V1 passage corpus for uniCOIL (noexp). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-unicoil-noexp.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-unicoil-noexp.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-unicoil-noexp.20221005.252b5e.tar.gz",
+        ],
+        "md5": "dcb6506e0b8bb1d41863ea9cbaa057cf",
+        "size compressed (bytes)": 873512626,
+        "total_terms": 26468530021,
+        "documents": 8841823,
+        "unique_terms": 27647,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-deepimpact": {
+        "description": "Lucene impact index of the MS MARCO passage corpus encoded by DeepImpact. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-deepimpact.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-deepimpact.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-deepimpact.20221005.252b5e.tar.gz",
+        ],
+        "md5": "e1cd5bd86ae5b35912991a6c8c448bb0",
+        "size compressed (bytes)": 1242661484,
+        "total_terms": 35455908214,
+        "documents": 8841823,
+        "unique_terms": 3514102,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-unicoil-tilde": {
+        "description": "Lucene impact index of the MS MARCO passage corpus encoded by uniCOIL-TILDE. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-unicoil-tilde.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-unicoil-tilde.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-unicoil-tilde.20221005.252b5e.tar.gz",
+        ],
+        "md5": "b732c58113ec39b197083dee3e702932",
+        "size compressed (bytes)": 1871922326,
+        "total_terms": 73040108576,
+        "documents": 8841823,
+        "unique_terms": 27646,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-distill-splade-max": {
+        "description": "Lucene impact index of the MS MARCO passage corpus encoded by distill-splade-max. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-distill-splade-max.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-distill-splade-max.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-distill-splade-max.20221005.252b5e.tar.gz"
+        ],
+        "md5": "7d8b56b348685b9c3e29e306803c61eb",
+        "size compressed (bytes)": 3822892457,
+        "total_terms": 95445422483,
+        "documents": 8841823,
+        "unique_terms": 28131,
+        "downloaded": False
+    },
+
+    "msmarco-v1-passage-splade-pp-ed": {
+        "description": "Lucene impact index of the MS MARCO passage corpus encoded by SPLADE++ CoCondenser-EnsembleDistil. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-splade-pp-ed.20230524.a59610.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-splade-pp-ed.20230524.a59610.tar.gz"
+        ],
+        "md5": "4b3c969033cbd017306df42ce134c395",
+        "size compressed (bytes)": 2102229906,
+        "total_terms": 52376261130,
+        "documents": 8841823,
+        "unique_terms": 28679,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-splade-pp-ed-docvectors": {
+        "description": "Lucene impact index (with docvectors) of the MS MARCO passage corpus encoded by SPLADE++ CoCondenser-EnsembleDistil. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-splade-pp-ed-docvectors.20230524.a59610.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-splade-pp-ed-docvectors.20230524.a59610.tar.gz"
+        ],
+        "md5": "270301ea1413c38cc83cf682c7787b49",
+        "size compressed (bytes)": 13052697908,
+        "total_terms": 52376261130,
+        "documents": 8841823,
+        "unique_terms": 28679,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-splade-pp-ed-text": {
+        "description": "Lucene impact index (with text) of the MS MARCO passage corpus encoded by SPLADE++ CoCondenser-EnsembleDistil. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-splade-pp-ed-text.20230524.a59610.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-splade-pp-ed-text.20230524.a59610.tar.gz"
+        ],
+        "md5": "151e9b1b345197cd4a0edbf7127f3deb",
+        "size compressed (bytes)": 9983469862,
+        "total_terms": 52376261130,
+        "documents": 8841823,
+        "unique_terms": 28679,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-splade-pp-sd": {
+        "description": "Lucene impact index of the MS MARCO passage corpus encoded by SPLADE++ CoCondenser-SelfDistil. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-splade-pp-sd.20230524.a59610.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-splade-pp-sd.20230524.a59610.tar.gz"
+        ],
+        "md5": "4e4a3969c1e9e7262b2783ad192086ae",
+        "size compressed (bytes)": 2367261002,
+        "total_terms": 55456660129,
+        "documents": 8841823,
+        "unique_terms": 28662,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-splade-pp-sd-docvectors": {
+        "description": "Lucene impact index (with docvectors) of the MS MARCO passage corpus encoded by SPLADE++ CoCondenser-SelfDistil. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-splade-pp-sd-docvectors.20230524.a59610.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-splade-pp-sd-docvectors.20230524.a59610.tar.gz"
+        ],
+        "md5": "8d75aecc95e63853c832916da62e97f9",
+        "size compressed (bytes)": 14829233820,
+        "total_terms": 55456660129,
+        "documents": 8841823,
+        "unique_terms": 28662,
+        "downloaded": False
+    },
+    "msmarco-v1-passage-splade-pp-sd-text": {
+        "description": "Lucene impact index (with text) of the MS MARCO passage corpus encoded by SPLADE++ CoCondenser-SelfDistil. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-passage-splade-pp-sd-text.20230524.a59610.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-passage-splade-pp-sd-text.20230524.a59610.tar.gz"
+        ],
+        "md5": "1d90dc2803a6fea55a4d16da7623e2ed",
+        "size compressed (bytes)": 11473065718,
+        "total_terms": 55456660129,
+        "documents": 8841823,
+        "unique_terms": 28662,
+        "downloaded": False
+    },
+
+    "msmarco-v1-doc-segmented-unicoil": {
+        "description": "Lucene impact index of the MS MARCO V1 segmented document corpus for uniCOIL, with title/segment encoding. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented-unicoil.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented-unicoil.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented-unicoil.20221005.252b5e.tar.gz",
+        ],
+        "md5": "06e087b8575f3d49177abfcfaf4bba1c",
+        "size compressed (bytes)": 5765257637,
+        "total_terms": 214505277898,
+        "documents": 20545677,
+        "unique_terms": 29142,
+        "downloaded": False
+    },
+    "msmarco-v1-doc-segmented-unicoil-noexp": {
+        "description": "Lucene impact index of the MS MARCO V1 segmented document corpus for uniCOIL (noexp), with title/segment encoding. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v1-doc-segmented-unicoil-noexp.20221005.252b5e.tar.gz",
+        "readme": "lucene-index.msmarco-v1-doc-segmented-unicoil-noexp.20221005.252b5e.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v1-doc-segmented-unicoil-noexp.20221005.252b5e.tar.gz",
+        ],
+        "md5": "f2bb0e6e9e0ea4baa6072f6f842623d8",
+        "size compressed (bytes)": 5323380960,
+        "total_terms": 152323732876,
+        "documents": 20545677,
+        "unique_terms": 29142,
+        "downloaded": False
+    },
+
+    "msmarco-v2-passage-unicoil-0shot": {
+        "description": "Lucene impact index of the MS MARCO V2 passage corpus for uniCOIL. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "9da229088995a3abfea57dd8681d16d5",
+        "size compressed (bytes)": 21736933361,
+        "total_terms": 775253560148,
+        "documents": 138364198,
+        "unique_terms": 29149,
+        "downloaded": False
+    },
+    "msmarco-v2-passage-unicoil-noexp-0shot": {
+        "description": "Lucene impact index of the MS MARCO V2 passage corpus for uniCOIL (noexp). (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.tar.gz",
+        ],
+        "md5": "dda9de84072d2162e8649a040153942e",
+        "size compressed (bytes)": 14347302774,
+        "total_terms": 411330032512,
+        "documents": 138364198,
+        "unique_terms": 29148,
+        "downloaded": False
+    },
+
+    "msmarco-v2-doc-segmented-unicoil-0shot": {
+        "description": "Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL, with title prepended. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "cc98b13869c78ad3ef069d3a1c4ebaf4",
+        "size compressed (bytes)": 33573641204,
+        "total_terms": 1204542769110,
+        "documents": 124131414,
+        "unique_terms": 29168,
+        "downloaded": False
+    },
+    "msmarco-v2-doc-segmented-unicoil-noexp-0shot": {
+        "description": "Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL (noexp) with title prepended. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.tar.gz",
+        "readme": "lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.tar.gz"
+        ],
+        "md5": "e70c3bf0016407bf20cfe35fb0d277e0",
+        "size compressed (bytes)": 29059155839,
+        "total_terms": 820664704261,
+        "documents": 124131404,
+        "unique_terms": 29172,
+        "downloaded": False
+    }
+}
+
+IMPACT_INDEX_INFO_BEIR = {
+    # BEIR (v1.0.0) impact indexes encoded by SPLADE-distill CoCodenser-medium
+    "beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): TREC-COVID encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "0f91fb01fec4b1c590fe683ad2383339",
+        "size compressed (bytes)": 55889585,
+        "total_terms": 1697942549,
+        "documents": 171332,
+        "unique_terms": 26611,
+        "downloaded": False
+    },
+    "beir-v1.0.0-bioasq-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): BioASQ encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "a0317f26b1fab3bca71b46e0a4eff816",
+        "size compressed (bytes)": 5396189427,
+        "total_terms": 181960155708,
+        "documents": 14914603,
+        "unique_terms": 27703,
+        "downloaded": False
+    },
+    "beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): NFCorpus encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "9c6f3ecfa6186c3ab5125f5c3d4eb962",
+        "size compressed (bytes)": 1439110,
+        "total_terms": 41582222,
+        "documents": 3633,
+        "unique_terms": 16295,
+        "downloaded": False
+    },
+    "beir-v1.0.0-nq-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): NQ encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "9d418f806b8304a075945afa80bfcc22",
+        "size compressed (bytes)": 833470407,
+        "total_terms": 21901570532,
+        "documents": 2681468,
+        "unique_terms": 28747,
+        "downloaded": False
+    },
+    "beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): HotpotQA encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "e96767f3d97cba5104dfd76eafdb35b7",
+        "size compressed (bytes)": 1173403732,
+        "total_terms": 32565190895,
+        "documents": 5233329,
+        "unique_terms": 28724,
+        "downloaded": False
+    },
+    "beir-v1.0.0-fiqa-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): FiQA-2018 encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "937f0112a77a81879d6e42431d7fd522",
+        "size compressed (bytes)": 19624314,
+        "total_terms": 487502241,
+        "documents": 57638,
+        "unique_terms": 26244,
+        "downloaded": False
+    },
+    "beir-v1.0.0-signal1m-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): Signal-1M encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "ac79812f60bcd597d351174a58fb085c",
+        "size compressed (bytes)": 602427178,
+        "total_terms": 13103073741,
+        "documents": 2866316,
+        "unique_terms": 28130,
+        "downloaded": False
+    },
+    "beir-v1.0.0-trec-news-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): TREC-NEWS encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "d24ca30cb52510d193f9361e7f6996b7",
+        "size compressed (bytes)": 270800660,
+        "total_terms": 7519025445,
+        "documents": 594977,
+        "unique_terms": 27745,
+        "downloaded": False
+    },
+    "beir-v1.0.0-robust04-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): Robust04 encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "24e6310cd04a73604a8b467e582d153f",
+        "size compressed (bytes)": 213476457,
+        "total_terms": 6718533167,
+        "documents": 528155,
+        "unique_terms": 27623,
+        "downloaded": False
+    },
+    "beir-v1.0.0-arguana-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): ArguAna encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "d008e420e5be96ab7e9d40bafc3183ce",
+        "size compressed (bytes)": 3816904,
+        "total_terms": 96421121,
+        "documents": 8674,
+        "unique_terms": 22536,
+        "downloaded": False
+    },
+    "beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): Webis-Touche2020 encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "e05433f5cd3113b50b5fe166e18975d4",
+        "size compressed (bytes)": 124322238,
+        "total_terms": 3229042324,
+        "documents": 382545,
+        "unique_terms": 27742,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-android encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "753c02411a6391e5d45ba39fdc30a535",
+        "size compressed (bytes)": 5995405,
+        "total_terms": 157949889,
+        "documents": 22998,
+        "unique_terms": 18891,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-english encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "f377274f93d9f6426034fdd78457f5ee",
+        "size compressed (bytes)": 9857825,
+        "total_terms": 218761119,
+        "documents": 40221,
+        "unique_terms": 26613,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-gaming encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "a8549ec6b7af25fe4a60fd7f4827afbd",
+        "size compressed (bytes)": 12976249,
+        "total_terms": 296073202,
+        "documents": 45301,
+        "unique_terms": 24564,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-gis encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "26341f18a352668986bc8cf82006dc38",
+        "size compressed (bytes)": 10250646,
+        "total_terms": 296967034,
+        "documents": 37637,
+        "unique_terms": 22034,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-mathematica encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "799a7c044cc774b29e55de4a8c0a813b",
+        "size compressed (bytes)": 4771584,
+        "total_terms": 132796971,
+        "documents": 16705,
+        "unique_terms": 19765,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-physics encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "75ed5bb0217ba4f1c957bc25109f2823",
+        "size compressed (bytes)": 10887180,
+        "total_terms": 284896455,
+        "documents": 38316,
+        "unique_terms": 22985,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-programmers encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "42e2da2036a3e1d5780c90cda8c2193e",
+        "size compressed (bytes)": 10036425,
+        "total_terms": 258856106,
+        "documents": 32176,
+        "unique_terms": 22560,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-stats encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "50043a036497ea6533fd2ce62f151370",
+        "size compressed (bytes)": 11867711,
+        "total_terms": 333590386,
+        "documents": 42269,
+        "unique_terms": 23322,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-tex encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "83026f984c1007c656f15d7c01cf5da0",
+        "size compressed (bytes)": 19613041,
+        "total_terms": 604604076,
+        "documents": 68184,
+        "unique_terms": 24669,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-unix encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "5bb2b4999e8769aca00c7dff2baaf297",
+        "size compressed (bytes)": 12705584,
+        "total_terms": 369576280,
+        "documents": 47382,
+        "unique_terms": 21712,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-webmasters encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "bb2b4227659f8f32e1fcd4d7dee6065c",
+        "size compressed (bytes)": 4987493,
+        "total_terms": 127823828,
+        "documents": 17405,
+        "unique_terms": 20286,
+        "downloaded": False
+    },
+    "beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): CQADupStack-wordpress encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "2acbaa7b2a0f8699e54fdee2efb2d376",
+        "size compressed (bytes)": 12583602,
+        "total_terms": 362488001,
+        "documents": 48605,
+        "unique_terms": 21867,
+        "downloaded": False
+    },
+    "beir-v1.0.0-quora-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): Quora encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "6358d683527284ecf4c1dbb6ad008a0f",
+        "size compressed (bytes)": 51880975,
+        "total_terms": 1322737004,
+        "documents": 522931,
+        "unique_terms": 27042,
+        "downloaded": False
+    },
+    "beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): DBPedia encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "9cb05766611bea863a96818219657c78",
+        "size compressed (bytes)": 1225612002,
+        "total_terms": 30490098411,
+        "documents": 4635922,
+        "unique_terms": 28709,
+        "downloaded": False
+    },
+    "beir-v1.0.0-scidocs-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): SCIDOCS encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "38d2a4bbabf9b6b1cd627ce81660e07d",
+        "size compressed (bytes)": 11252695,
+        "total_terms": 273175826,
+        "documents": 25657,
+        "unique_terms": 24241,
+        "downloaded": False
+    },
+    "beir-v1.0.0-fever-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): FEVER encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "cc71baa5838edd4e7cd288ca26488532",
+        "size compressed (bytes)": 1497554696,
+        "total_terms": 38844967407,
+        "documents": 5416568,
+        "unique_terms": 28670,
+        "downloaded": False
+    },
+    "beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): Climate-FEVER encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "1479d75cd9496a7d57064b86f6ee67ef",
+        "size compressed (bytes)": 1497450545,
+        "total_terms": 38845226073,
+        "documents": 5416593,
+        "unique_terms": 28670,
+        "downloaded": False
+    },
+    "beir-v1.0.0-scifact-splade_distil_cocodenser_medium": {
+        "description": "Lucene impact index of BEIR (v1.0.0): SciFact encoded by SPLADE-distill CoCodenser-medium",
+        "filename": "lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20221116.505594.tar.gz",
+        "readme": "lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20221116.505594.tar.gz"
+        ],
+        "md5": "367db6c4a466d442ba089a38dad9fc6e",
+        "size compressed (bytes)": 2173167,
+        "total_terms": 65836037,
+        "documents": 5183,
+        "unique_terms": 17486,
+        "downloaded": False
+    }
+}
+
+IMPACT_INDEX_INFO = {**IMPACT_INDEX_INFO_MSMARCO,
+                     **IMPACT_INDEX_INFO_BEIR}
+
+FAISS_INDEX_INFO_MSMARCO = {
+    # Aggretriever indexes
+    "msmarco-v1-passage.aggretriever-cocondenser": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by aggretriever-cocondenser encoder.",
+        "filename": "faiss.msmarco-v1-passage.aggretriever-cocondenser.20230407.f627ef.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.aggretriever-cocondenser.20230407.f627ef.tar.gz"
+        ],
+        "md5": "58da608d5b31b28001b3aa1cf33479f6",
+        "size compressed (bytes)": 26053474943,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.aggretriever-distilbert": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by aggretriever-distilbert encoder.",
+        "filename": "faiss.msmarco-v1-passage.aggretriever-distilbert.20230407.f627ef.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.aggretriever-distilbert.20230407.f627ef.tar.gz"
+        ],
+        "md5": "ed1492be0ce7539aacd5db5028404989",
+        "size compressed (bytes)": 25963140666,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+
+    "msmarco-v1-passage.ance": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by the ANCE MS MARCO passage encoder",
+        "filename": "faiss.msmarco-v1-passage.ance.20210224.060cef.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.ance.20210224.060cef.tar.gz"
+        ],
+        "md5": "c4e485efd0802031783d6fe487125446",
+        "size compressed (bytes)": 25102344836,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.distilbert-dot-margin-mse-t2": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by the distilbert-dot-margin_mse-T2-msmarco encoder",
+        "filename": "faiss.msmarco-v1-passage.distilbert-dot-margin_mse-t2.20210316.d44c3a.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.distilbert-dot-margin_mse-t2.20210316.d44c3a.tar.gz"
+        ],
+        "md5": "66dcbf3331f270673e3b9702a6ad3540",
+        "size compressed (bytes)": 25162771693,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.distilbert-dot-tas_b-b256": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by distilbert-dot-tas_b-b256-msmarco encoder",
+        "filename": "faiss.msmarco-v1-passage.distilbert-dot-tas_b-b256.20210527.63276f.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.distilbert-dot-tas_b-b256.20210527.63276f.tar.gz"
+        ],
+        "md5": "4e64a643fc051bc9506a3a554e9394e7",
+        "size compressed (bytes)": 25162329414,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.sbert": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by the SBERT MS MARCO passage encoder",
+        "filename": "faiss.msmarco-v1-passage.sbert.20210313.a0fbb3.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.sbert.20210313.a0fbb3.tar.gz"
+        ],
+        "md5": "d5d9eb67fa9da8b77a219ac77a5a3d3e",
+        "size compressed (bytes)": 25214193092,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.tct_colbert": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by TCT-ColBERT",
+        "filename": "faiss.msmarco-v1-passage.tct_colbert.20210112.be7119.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.tct_colbert.20210112.be7119.tar.gz"
+        ],
+        "md5": "2dea6e8697b220719139027c7ee2aff0",
+        "size compressed (bytes)": 25204501822,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.tct_colbert.hnsw": {
+        "description": "Faiss HNSW index of the MS MARCO passage corpus encoded by TCT-ColBERT",
+        "filename": "hnsw-faiss.msmarco-v1-passage.tct_colbert.20210112.be7119.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/hnsw-faiss.msmarco-v1-passage.tct_colbert.20210112.be7119.tar.gz"
+        ],
+        "md5": "24acb6e6ba0ac1f5c6b73bd3e6d6477f",
+        "size compressed (bytes)": 33359120779,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.tct_colbert-v2": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by the tct_colbert-v2 passage encoder",
+        "filename": "faiss.msmarco-v1-passage.tct_colbert-v2.20210608.5f341b.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.tct_colbert-v2.20210608.5f341b.tar.gz"
+        ],
+        "md5": "805bb253828a59af1899a8cc42e0f766",
+        "size compressed (bytes)": 25211079468,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.tct_colbert-v2-hn": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by the tct_colbert-v2-hn passage encoder",
+        "filename": "faiss.msmarco-v1-passage.tct_colbert-v2-hn.20210608.5f341b.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.tct_colbert-v2-hn.20210608.5f341b.tar.gz"
+        ],
+        "md5": "569f0ee9d45586b547d84fcd240e5cee",
+        "size compressed (bytes)": 25205730053,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.tct_colbert-v2-hnp": {
+        "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by the tct_colbert-v2-hnp passage encoder",
+        "filename": "faiss.msmarco-v1-passage.tct_colbert-v2-hnp.20210608.5f341b.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.tct_colbert-v2-hnp.20210608.5f341b.tar.gz"
+        ],
+        "md5": "53bcaa78ab0ca629f3379b8aa00eb3ae",
+        "size compressed (bytes)": 25225526436,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+    "msmarco-v1-passage.openai-ada2": {
+        "description": "Faiss FlatIP index of the MS MARCO document corpus encoded by TCT-ColBERT-V2-HNP",
+        "filename": "faiss.msmarco-v1-passage.openai-ada2.20230530.e3a58f.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.openai-ada2.20230530.e3a58f.tar.gz"
+        ],
+        "md5": "14725ced21bdcd0c9866aab1cfe8f2e0",
+        "size compressed (bytes)": 45649935573,
+        "documents": 8841823,
+        "downloaded": False,
+        "texts": "msmarco-v1-passage"
+    },
+
+    "msmarco-v1-doc.ance-maxp": {
+        "description": "Faiss FlatIP index of the MS MARCO document corpus encoded by the ANCE MaxP encoder",
+        "filename": "faiss.msmarco-v1-doc.ance_maxp.20210304.b2a1b0.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-doc.ance_maxp.20210304.b2a1b0.tar.gz"
+        ],
+        "md5": "6b484b0c04872f22ae903ed7d7ba1327",
+        "size compressed (bytes)": 58312805253,
+        "documents": 20544550,
+        "downloaded": False,
+        "texts": "msmarco-v1-doc"
+    },
+    "msmarco-v1-doc.tct_colbert": {
+        "description": "Faiss FlatIP index of the MS MARCO document corpus encoded by TCT-ColBERT",
+        "filename": "faiss.msmarco-v1-doc.tct_colbert.20210112.be7119.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-doc.tct_colbert.20210112.be7119.tar.gz"
+        ],
+        "md5": "4e97e1d6990ba5d4b93b7798c3036edc",
+        "size compressed (bytes)": 58514325945,
+        "documents": 20544550,
+        "downloaded": False,
+        "texts": "smarco-v1-doc"
+    },
+    "msmarco-v1-doc-segmented.tct_colbert-v2-hnp": {
+        "description": "Faiss FlatIP index of the MS MARCO document corpus encoded by TCT-ColBERT-V2-HNP",
+        "filename": "faiss.msmarco-v1-doc-segmented.tct_colbert-v2-hnp.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-doc-segmented.tct_colbert-v2-hnp.tar.gz"
+        ],
+        "md5": "1dab64255822d2fd4dff8c0807319d0a",
+        "size compressed (bytes)": 58586765413,
+        "documents": 20544550,
+        "downloaded": False,
+        "texts": "msmarco-v1-doc-segmented"
+    }
+}
+
+FAISS_INDEX_INFO_BEIR = {
+    # BEIR (v1.0.0) contriever indexes
+    "beir-v1.0.0-trec-covid.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (TREC-COVID) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-trec-covid.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-trec-covid.contriever.20230124.tar.gz"
+        ],
+        "md5": "5b5baf557979e30e943180627fe31340",
+        "size compressed (bytes)": 488100317,
+        "documents": 171332,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-trec-covid.flat"
+    },
+    "beir-v1.0.0-bioasq.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (BioASQ) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-bioasq.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-bioasq.contriever.20230124.tar.gz"
+        ],
+        "md5": "c0cbca535d38c1f1f78ff1bd6d91af5d",
+        "size compressed (bytes)": 42417202460,
+        "documents": 14914603,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-bioasq.flat"
+    },
+    "beir-v1.0.0-nfcorpus.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (NFCorpus) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-nfcorpus.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-nfcorpus.contriever.20230124.tar.gz"
+        ],
+        "md5": "5eff0107f7953ebe7658c3a6400e7027",
+        "size compressed (bytes)": 10322409,
+        "documents": 3633,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-nfcorpus.flat"
+    },
+    "beir-v1.0.0-nq.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (NQ) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-nq.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-nq.contriever.20230124.tar.gz"
+        ],
+        "md5": "e1825fe0ce5c8000b63b1499374adb0e",
+        "size compressed (bytes)": 7617697503,
+        "documents": 2681468,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-nq.flat"
+    },
+    "beir-v1.0.0-hotpotqa.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (HotpotQA) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-hotpotqa.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-hotpotqa.contriever.20230124.tar.gz"
+        ],
+        "md5": "51445960e00a18264ae3947b3af2bc80",
+        "size compressed (bytes)": 14874721901,
+        "documents": 5233329,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-hotpotqa.flat"
+    },
+    "beir-v1.0.0-fiqa.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (FiQA-2018) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-fiqa.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-fiqa.contriever.20230124.tar.gz"
+        ],
+        "md5": "a03cc30459b1a1928b93ad1aa51a7849",
+        "size compressed (bytes)": 164024764,
+        "documents": 57638,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-fiqa.flat"
+    },
+    "beir-v1.0.0-signal1m.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (Signal-1M) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-signal1m.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-signal1m.contriever.20230124.tar.gz"
+        ],
+        "md5": "19e3e324b7b87e55fb9f6b6b1e72c464",
+        "size compressed (bytes)": 8142533760,
+        "documents": 2866316,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-signal1m.flat"
+    },
+    "beir-v1.0.0-trec-news.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (TREC-NEWS) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-trec-news.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-trec-news.contriever.20230124.tar.gz"
+        ],
+        "md5": "20db6299b57b3e78ea2f8b7a2b649770",
+        "size compressed (bytes)": 1629958623,
+        "documents": 594977,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-trec-news.flat"
+    },
+    "beir-v1.0.0-robust04.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (Robust04) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-robust04.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-robust04.contriever.20230124.tar.gz"
+        ],
+        "md5": "81c730b68e066baf18d5b46918b8c830",
+        "size compressed (bytes)": 1501110333,
+        "documents": 528155,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-robust04.flat"
+    },
+    "beir-v1.0.0-arguana.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (ArguAna) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-arguana.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-arguana.contriever.20230124.tar.gz"
+        ],
+        "md5": "03f701916d49dd86b9c8989796d2dcc4",
+        "size compressed (bytes)": 24710561,
+        "documents": 8674,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-arguana.flat"
+    },
+    "beir-v1.0.0-webis-touche2020.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (Webis-Touche2020) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-webis-touche2020.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-webis-touche2020.contriever.20230124.tar.gz"
+        ],
+        "md5": "dfff9bc58521f09542f0affa3069f9a7",
+        "size compressed (bytes)": 1091320704,
+        "documents": 382545,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-webis-touche2020.flat"
+    },
+    "beir-v1.0.0-cqadupstack-android.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-android) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-android.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-android.contriever.20230124.tar.gz"
+        ],
+        "md5": "4f03c0238f0e8f77e6365b61108042ed",
+        "size compressed (bytes)": 65447231,
+        "documents": 22998,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-android.flat"
+    },
+    "beir-v1.0.0-cqadupstack-english.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-english) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-english.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-english.contriever.20230124.tar.gz"
+        ],
+        "md5": "319e3cba8f5f5d5175aad92c99c4b0fd",
+        "size compressed (bytes)": 114460495,
+        "documents": 40221,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-english.flat"
+    },
+    "beir-v1.0.0-cqadupstack-gaming.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-gaming) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-gaming.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-gaming.contriever.20230124.tar.gz"
+        ],
+        "md5": "049f2cb22adfb5803a5f7f762f578bce",
+        "size compressed (bytes)": 128906099,
+        "documents": 45301,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-gaming.flat"
+    },
+    "beir-v1.0.0-cqadupstack-gis.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-gis) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-gis.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-gis.contriever.20230124.tar.gz"
+        ],
+        "md5": "13fdfa5a13634c10c1e7e6179bb4c376",
+        "size compressed (bytes)": 107128974,
+        "documents": 37637,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-gis.flat"
+    },
+    "beir-v1.0.0-cqadupstack-mathematica.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-mathematica) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-mathematica.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-mathematica.contriever.20230124.tar.gz"
+        ],
+        "md5": "e4f756eede3ae5f9228d32096c1bd5b4",
+        "size compressed (bytes)": 47544559,
+        "documents": 16705,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-mathematica.flat"
+    },
+    "beir-v1.0.0-cqadupstack-physics.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-physics) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-physics.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-physics.contriever.20230124.tar.gz"
+        ],
+        "md5": "b92ec0c233a1112d6f8782fb0f2bc9c1",
+        "size compressed (bytes)": 109048286,
+        "documents": 38316,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-physics.flat"
+    },
+    "beir-v1.0.0-cqadupstack-programmers.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-programmers) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-programmers.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-programmers.contriever.20230124.tar.gz"
+        ],
+        "md5": "f180240f35e2a3c27d39361a20533205",
+        "size compressed (bytes)": 91583135,
+        "documents": 32176,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-programmers.flat"
+    },
+    "beir-v1.0.0-cqadupstack-stats.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-stats) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-stats.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-stats.contriever.20230124.tar.gz"
+        ],
+        "md5": "64737df62b4e03b93356ba234cefe0e6",
+        "size compressed (bytes)": 120288620,
+        "documents": 42269,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-stats.flat"
+    },
+    "beir-v1.0.0-cqadupstack-tex.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-tex) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-tex.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-tex.contriever.20230124.tar.gz"
+        ],
+        "md5": "ef087faff49e5bae0799e8576e387c0d",
+        "size compressed (bytes)": 194080724,
+        "documents": 68184,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-tex.flat"
+    },
+    "beir-v1.0.0-cqadupstack-unix.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-unix) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-unix.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-unix.contriever.20230124.tar.gz"
+        ],
+        "md5": "9279884bfc3a14c2896276b679a58dbf",
+        "size compressed (bytes)": 134860159,
+        "documents": 47382,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-unix.flat"
+    },
+    "beir-v1.0.0-cqadupstack-webmasters.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-webmasters) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-webmasters.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-webmasters.contriever.20230124.tar.gz"
+        ],
+        "md5": "f1a46fc6f6586c716d2a6239753c9573",
+        "size compressed (bytes)": 49531545,
+        "documents": 17405,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-webmasters.flat"
+    },
+    "beir-v1.0.0-cqadupstack-wordpress.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-wordpress) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-wordpress.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-wordpress.contriever.20230124.tar.gz"
+        ],
+        "md5": "27480c7a4c8d437af30618bf98b10969",
+        "size compressed (bytes)": 138348184,
+        "documents": 48605,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-wordpress.flat"
+    },
+    "beir-v1.0.0-quora.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (Quora) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-quora.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-quora.contriever.20230124.tar.gz"
+        ],
+        "md5": "4876145908b7af946593df6dbb8af600",
+        "size compressed (bytes)": 1485866217,
+        "documents": 522931,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-quora.flat"
+    },
+    "beir-v1.0.0-dbpedia-entity.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (DBPedia) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-dbpedia-entity.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-dbpedia-entity.contriever.20230124.tar.gz"
+        ],
+        "md5": "ee88a23de31d3faf403673c08ea0c844",
+        "size compressed (bytes)": 13214316305,
+        "documents": 4635922,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-dbpedia-entity.flat"
+    },
+    "beir-v1.0.0-scidocs.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (SCIDOCS) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-scidocs.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-scidocs.contriever.20230124.tar.gz"
+        ],
+        "md5": "dd1555b714c482a22cbb74d8c72599c9",
+        "size compressed (bytes)": 73532556,
+        "documents": 25657,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-scidocs.flat"
+    },
+    "beir-v1.0.0-fever.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (FEVER) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-fever.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-fever.contriever.20230124.tar.gz"
+        ],
+        "md5": "d5b738dc38e56857a987bdb1eb4ce5c1",
+        "size compressed (bytes)": 15437918827,
+        "documents": 5416568,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-fever.flat"
+    },
+    "beir-v1.0.0-climate-fever.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (Climate-FEVER) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-climate-fever.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-climate-fever.contriever.20230124.tar.gz"
+        ],
+        "md5": "1e169cf6a8baaa4909f6823e3c23a80f",
+        "size compressed (bytes)": 15437988868,
+        "documents": 5416593,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-climate-fever.flat"
+    },
+    "beir-v1.0.0-scifact.contriever": {
+        "description": "Faiss index for BEIR v1.0.0 (SciFact) corpus encoded by Contriever encoder.",
+        "filename": "faiss.beir-v1.0.0-scifact.contriever.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-scifact.contriever.20230124.tar.gz"
+        ],
+        "md5": "61eb253aa08c9c97fa2f82ef2a96ca7b",
+        "size compressed (bytes)": 14753553,
+        "documents": 5183,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-scifact.flat"
+    },
+
+    # BEIR (v1.0.0) contriever ft MSMARCO indexes
+    "beir-v1.0.0-trec-covid.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (TREC-COVID) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-trec-covid.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-trec-covid.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "7dd33fbd77deba89174b6d1b2c34866c",
+        "size compressed (bytes)": 487986935,
+        "documents": 171332,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-trec-covid.flat",
+    },
+    "beir-v1.0.0-bioasq.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (BioASQ) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-bioasq.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-bioasq.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "e51924bb78555942f0a9465959a6f6f2",
+        "size compressed (bytes)": 42438279267,
+        "documents": 14914603,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-bioasq.flat",
+    },
+    "beir-v1.0.0-nfcorpus.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (NFCorpus) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-nfcorpus.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-nfcorpus.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "657649d19fafd06cb031c6b11868d7f9",
+        "size compressed (bytes)": 10327231,
+        "documents": 3633,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-nfcorpus.flat",
+    },
+    "beir-v1.0.0-nq.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (NQ) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-nq.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-nq.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "8d7ff2e5e285b1549bb8af27a7cf6e30",
+        "size compressed (bytes)": 7619790303,
+        "documents": 2681468,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-nq.flat",
+    },
+    "beir-v1.0.0-hotpotqa.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (HotpotQA) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-hotpotqa.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-hotpotqa.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "bef5b2fba77859c778f121ae2f17c9f1",
+        "size compressed (bytes)": 14889518902,
+        "documents": 5233329,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-hotpotqa.flat",
+    },
+    "beir-v1.0.0-fiqa.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (FiQA-2018) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-fiqa.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-fiqa.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "3dd16db861dbef4da545ccbea127198a",
+        "size compressed (bytes)": 163998627,
+        "documents": 57638,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-fiqa.flat",
+    },
+    "beir-v1.0.0-signal1m.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (Signal-1M) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-signal1m.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-signal1m.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "c4e25dc99c27a9d1931ad129d4091da0",
+        "size compressed (bytes)": 8146484698,
+        "documents": 2866316,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-signal1m.flat",
+    },
+    "beir-v1.0.0-trec-news.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (TREC-NEWS) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-trec-news.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-trec-news.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "22272011f0e0dea7f66b624de196b6b3",
+        "size compressed (bytes)": 1629437319,
+        "documents": 594977,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-trec-news.flat",
+    },
+    "beir-v1.0.0-robust04.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (Robust04) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-robust04.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-robust04.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "a2a0603fae866e1e92abcdfc46de6fe5",
+        "size compressed (bytes)": 1501089289,
+        "documents": 528155,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-robust04.flat",
+    },
+    "beir-v1.0.0-arguana.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (ArguAna) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-arguana.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-arguana.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "dcc0408ab033433d47363f5902fbde3d",
+        "size compressed (bytes)": 24705859,
+        "documents": 8674,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-arguana.flat",
+    },
+    "beir-v1.0.0-webis-touche2020.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (Webis-Touche2020) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-webis-touche2020.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-webis-touche2020.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "60072a3b32855067fea0f8e21ce0d905",
+        "size compressed (bytes)": 1090748271,
+        "documents": 382545,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-webis-touche2020.flat",
+    },
+    "beir-v1.0.0-cqadupstack-android.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-android) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-android.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-android.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "f9b02c2410fc8ddf63e96ea6ebbd8447",
+        "size compressed (bytes)": 65438882,
+        "documents": 22998,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-android.flat",
+    },
+    "beir-v1.0.0-cqadupstack-english.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-english) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-english.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-english.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "7c50f04a61a08f16dfb1d28010b4e222",
+        "size compressed (bytes)": 114462161,
+        "documents": 40221,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-english.flat",
+    },
+    "beir-v1.0.0-cqadupstack-gaming.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-gaming) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-gaming.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-gaming.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "d97fafe933ae40fc12a9df0afc6a8e78",
+        "size compressed (bytes)": 128896840,
+        "documents": 45301,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-gaming.flat",
+    },
+    "beir-v1.0.0-cqadupstack-gis.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-gis) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-gis.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-gis.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "f536d8feda0069a1769ad71010fab0e3",
+        "size compressed (bytes)": 107086862,
+        "documents": 37637,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-gis.flat",
+    },
+    "beir-v1.0.0-cqadupstack-mathematica.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-mathematica) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-mathematica.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-mathematica.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "987fb7ac275baf344828cdda0013703d",
+        "size compressed (bytes)": 47526982,
+        "documents": 16705,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-mathematica.flat",
+    },
+    "beir-v1.0.0-cqadupstack-physics.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-physics) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-physics.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-physics.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "e252b1c4dcb06d2183109dc4bc820176",
+        "size compressed (bytes)": 109024692,
+        "documents": 38316,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-physics.flat",
+    },
+    "beir-v1.0.0-cqadupstack-programmers.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-programmers) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-programmers.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-programmers.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "09bd10b2b06c7b0c7611e7811958f4b3",
+        "size compressed (bytes)": 91567840,
+        "documents": 32176,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-programmers.flat",
+    },
+    "beir-v1.0.0-cqadupstack-stats.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-stats) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-stats.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-stats.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "c4586c11a2bc90f9ea5a3355fc6e6c53",
+        "size compressed (bytes)": 120271253,
+        "documents": 42269,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-stats.flat",
+    },
+    "beir-v1.0.0-cqadupstack-tex.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-tex) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-tex.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-tex.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "c3c5ec87aeb33a7320c0d61146c03fc0",
+        "size compressed (bytes)": 194009234,
+        "documents": 68184,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-tex.flat",
+    },
+    "beir-v1.0.0-cqadupstack-unix.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-unix) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-unix.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-unix.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "3220f3eb0e9f0095cf13dcc8eb3ae1e0",
+        "size compressed (bytes)": 134821535,
+        "documents": 47382,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-unix.flat",
+    },
+    "beir-v1.0.0-cqadupstack-webmasters.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-webmasters) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-webmasters.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-webmasters.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "f696855c02090833a6ca695f8efa3006",
+        "size compressed (bytes)": 49530869,
+        "documents": 17405,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-webmasters.flat",
+    },
+    "beir-v1.0.0-cqadupstack-wordpress.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (CQADupStack-wordpress) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-cqadupstack-wordpress.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-wordpress.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "e92063c046803a76010b57e0ef1ace9e",
+        "size compressed (bytes)": 138328541,
+        "documents": 48605,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-cqadupstack-wordpress.flat",
+    },
+    "beir-v1.0.0-quora.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (Quora) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-quora.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-quora.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "82481f11087ebf63156da1f3dda00d5e",
+        "size compressed (bytes)": 1487402659,
+        "documents": 522931,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-quora.flat",
+    },
+    "beir-v1.0.0-dbpedia-entity.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (DBPedia) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-dbpedia-entity.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-dbpedia-entity.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "5b9249745aa548776a8f22269bd55dbe",
+        "size compressed (bytes)": 13226846024,
+        "documents": 4635922,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-dbpedia-entity.flat",
+    },
+    "beir-v1.0.0-scidocs.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (SCIDOCS) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-scidocs.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-scidocs.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "091d751629ae22d843ce741f05f00b81",
+        "size compressed (bytes)": 73530332,
+        "documents": 25657,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-scidocs.flat",
+    },
+    "beir-v1.0.0-fever.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (FEVER) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-fever.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-fever.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "c1e9851e23c9f46e7210aedd613e4a1b",
+        "size compressed (bytes)": 15444001312,
+        "documents": 5416568,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-fever.flat",
+    },
+    "beir-v1.0.0-climate-fever.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (Climate-FEVER) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-climate-fever.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-climate-fever.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "1ec289569b80edb25d885100feba83aa",
+        "size compressed (bytes)": 15444073223,
+        "documents": 5416593,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-climate-fever.flat",
+    },
+    "beir-v1.0.0-scifact.contriever-msmarco": {
+        "description": "Faiss index for BEIR v1.0.0 (SciFact) corpus encoded by Contriever encoder that has been fine-tuned with MS MARCO passage.",
+        "filename": "faiss.beir-v1.0.0-scifact.contriever-msmarco.20230124.tar.gz",
+        "readme": "faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-scifact.contriever-msmarco.20230124.tar.gz"
+        ],
+        "md5": "e560d5de0ccb65f66853540cb6917369",
+        "size compressed (bytes)": 14758747,
+        "documents": 5183,
+        "downloaded": False,
+        "texts": "beir-v1.0.0-scifact.flat",
+    }
+}
+
+FAISS_INDEX_INFO_MRTYDI = {
+    "mrtydi-v1.1-arabic-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-arabic.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-arabic.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-arabic.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/Jgj3rYjbyRrmJs8/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "de86c1ce43854bbeea4e3af5d95d6ffb",
+        "size compressed (bytes)": 5997943791,
+        "documents": 2106586,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-arabic"
+    },
+    "mrtydi-v1.1-bengali-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-bengali.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-bengali.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-bengali.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/4PpkzXAQtXFFJHR/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "e60cb6f1f7139cf0551f0ba4e4e83bf6",
+        "size compressed (bytes)": 865716848,
+        "documents": 304059,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-bengali"
+    },
+    "mrtydi-v1.1-english-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-english.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-english.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-english.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/A7pjbwYeoT4Krnj/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "a0a8cc39e8af782ec82188a18c4c97c3",
+        "size compressed (bytes)": 93585951488,
+        "documents": 32907100,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-english"
+    },
+    "mrtydi-v1.1-finnish-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-finnish.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-finnish.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-finnish.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/erNYkrYzRZxpecz/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "3e4e18aacf07ca551b474315f267ead6",
+        "size compressed (bytes)": 5435516778,
+        "documents": 1908757,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-finnish"
+    },
+    "mrtydi-v1.1-indonesian-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-indonesian.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-indonesian.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-indonesian.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/BpR3MzT7KJ6edx7/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "0bf693e4046d9a565ae18b9f5939d193",
+        "size compressed (bytes)": 865716848,
+        "documents": 4179177829,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-indonesian"
+    },
+    "mrtydi-v1.1-japanese-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-japanese.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-japanese.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-japanese.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/k7bptHT8GwMJpnF/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "4ba566e27bc0158108259b18a153e2fc",
+        "size compressed (bytes)": 19920816424,
+        "documents": 7000027,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-japanese"
+    },
+    "mrtydi-v1.1-korean-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-korean.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-korean.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-korean.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/TigfYMde94YWAoE/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "44212e5722632d5bcb14f0680741638c",
+        "size compressed (bytes)": 4257414237,
+        "documents": 1496126,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-korean"
+    },
+    "mrtydi-v1.1-russian-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-russian.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-russian.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-russian.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/eN7demnmnspqxjk/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "e7634093f2a3362928e9699441ce8a3b",
+        "size compressed (bytes)": 27317759143,
+        "documents": 9597504,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-russian"
+    },
+    "mrtydi-v1.1-swahili-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-swahili.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-swahili.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-swahili.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/JgiX8PRftnqcPwy/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "5061bdd1d81bc32490bbb3682096acdd",
+        "size compressed (bytes)": 389658394,
+        "documents": 136689,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-swahili"
+    },
+    "mrtydi-v1.1-telugu-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-telugu.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-telugu.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-telugu.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/dkm6RGdgRbnwiX2/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "4952dacaeae89185d3757f9f26af4e88",
+        "size compressed (bytes)": 1561173721,
+        "documents": 548224,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-telugu"
+    },
+    "mrtydi-v1.1-thai-mdpr-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-thai.20220207.5df364.tar.gz",
+        "readme": "faiss.mrtydi-v1.1-thai.20220207.5df364.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-thai.20220207.5df364.tar.gz",
+            "https://vault.cs.uwaterloo.ca/s/fFrRYefd3nWFR3J/download"  # Note, this is Crystina's account.
+        ],
+        "md5": "2458f704b277fa8ffe2509b6296892a0",
+        "size compressed (bytes)": 1616059846,
+        "documents": 568855,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-thai"
+    },
+
+    "mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-arabic.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-arabic.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "bafb6fb2c530567dec26aa4597c6ee25",
+        "size compressed (bytes)": 5997943791,
+        "documents": 2106586,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-arabic",
+    },
+    "mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-bengali.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-bengali.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "d04bb6e634fb4f7df23dbff7481a8f9b",
+        "size compressed (bytes)": 865733058,
+        "documents": 304059,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-bengali",
+    },
+    "mrtydi-v1.1-english-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-english.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-english.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "4a93a2211199f7359cc99486a9f93d02",
+        "size compressed (bytes)": 93594561391,
+        "documents": 32907100,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-english"
+    },
+    "mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-finnish.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-finnish.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "6cbe2d52225fb15a494857b9df593113",
+        "size compressed (bytes)": 5436419399,
+        "documents": 1908757,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-finnish"
+    },
+    "mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-indonesian.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-indonesian.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "26108a7ee1fc5ac15e0b7fcecf4d39ad",
+        "size compressed (bytes)": 4178791300,
+        "documents": 1469399,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-indonesian"
+    },
+    "mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-japanese.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-japanese.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "2ef2b5e3f5778d99e65aafc48450508a",
+        "size compressed (bytes)": 19918319452,
+        "documents": 7000027,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-japanese"
+    },
+    "mrtydi-v1.1-korean-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-korean.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-korean.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "26ed9be031603019304b66f985ce154c",
+        "size compressed (bytes)": 4256863335,
+        "documents": 1496126,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-korean"
+    },
+    "mrtydi-v1.1-russian-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-russian.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-russian.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "b1be7a45a702be4021f38425c0001f39",
+        "size compressed (bytes)": 27318555548,
+        "documents": 9597504,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-russian"
+    },
+    "mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-swahili.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-swahili.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "14edb5f677820b5a5a3858555e900591",
+        "size compressed (bytes)": 389600527,
+        "documents": 136689,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-swahili"
+    },
+    "mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-telugu.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-telugu.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "25b37f5d7a035a17b447f1732e241b85",
+        "size compressed (bytes)": 1561419958,
+        "documents": 548224,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-telugu"
+    },
+    "mrtydi-v1.1-thai-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.mrtydi-v1.1-thai.20220413.aa1c0e9.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-thai.20220413.aa1c0e9.tar.gz",
+        ],
+        "md5": "0544ce677fa31b633a29a079c0cdfc82",
+        "size compressed (bytes)": 1616716166,
+        "documents": 568855,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-thai"
+    },
+    "mrtydi-v1.1-arabic-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-arabic.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-arabic.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "3d764e7936bb6beb5308ccfd6717b38e",
+        "size compressed (bytes)": 5988743258,
+        "documents": 2106586,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-arabic"
+    },
+    "mrtydi-v1.1-bengali-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-bengali.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-bengali.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "2ee8e550245f7eb5184c27fe3369d818",
+        "size compressed (bytes)": 864358280,
+        "documents": 304059,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-bengali"
+    },
+    "mrtydi-v1.1-english-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-english.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-english.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "a1be61486c209bf2545d63f950274a99",
+        "size compressed (bytes)": 93435965796,
+        "documents": 32907100,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-english"
+    },
+    "mrtydi-v1.1-finnish-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-finnish.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-finnish.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "0dbd873fa8bf8c87052940bdf4097ba2",
+        "size compressed (bytes)": 5427976705,
+        "documents": 1908757,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-finnish"
+    },
+    "mrtydi-v1.1-indonesian-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-indonesian.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-indonesian.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "937f7c03e2386166e34ef81b25d7959f",
+        "size compressed (bytes)": 4172976570,
+        "documents": 4179177829,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-indonesian"
+    },
+    "mrtydi-v1.1-japanese-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-japanese.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-japanese.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "21a64d1a012a854d4bf42fa24c8712fd",
+        "size compressed (bytes)": 19890571158,
+        "documents": 7000027,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-japanese"
+    },
+    "mrtydi-v1.1-korean-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-korean.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-korean.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "ed3216fb5bc431ac52931b58cc4c4d0f",
+        "size compressed (bytes)": 4250320804,
+        "documents": 1496126,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-korean"
+    },
+    "mrtydi-v1.1-russian-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-russian.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-russian.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "c3c4db1397c7125f8e411cf637054148",
+        "size compressed (bytes)": 27278520787,
+        "documents": 9597504,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-russian"
+    },
+    "mrtydi-v1.1-swahili-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-swahili.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-swahili.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "20235115c0a877e11c91cb662d5a6fdb",
+        "size compressed (bytes)": 389244265,
+        "documents": 136689,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-swahili"
+    },
+    "mrtydi-v1.1-telugu-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-telugu.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-telugu.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "86cae6fe8f8c08489e49b6e6c28a09b0",
+        "size compressed (bytes)": 1558691592,
+        "documents": 548224,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-telugu"
+    },
+    "mrtydi-v1.1-thai-mdpr-tied-pft-nq": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-thai.20220523.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-thai.20220523.7b099d5.tar.gz",
+        ],
+        "md5": "3ba9c64a9f7479bd2e3a84a816ee0f6f",
+        "size compressed (bytes)": 1613563144,
+        "documents": 568855,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-thai"
+    },
+
+    "mrtydi-v1.1-arabic-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-arabic.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-arabic.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "9ea47ae7425fd3376f015ca7c6ba5134",
+        "size compressed (bytes)": 5988743258,
+        "documents": 2106586,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-arabic"
+    },
+    "mrtydi-v1.1-bengali-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-bengali.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-bengali.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "d1e75f4960a723b068bb778a972ffb54",
+        "size compressed (bytes)": 864358280,
+        "documents": 304059,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-bengali"
+    },
+    "mrtydi-v1.1-english-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-english.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-english.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "1fce43e549ff57bbac432a579961f34b",
+        "size compressed (bytes)": 93435965796,
+        "documents": 32907100,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-english"
+    },
+    "mrtydi-v1.1-finnish-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-finnish.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-finnish.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "6faa7b2fe8ad4b9ca284bd7e8f69b727",
+        "size compressed (bytes)": 5427976705,
+        "documents": 1908757,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-finnish"
+    },
+    "mrtydi-v1.1-indonesian-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-indonesian.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-indonesian.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "659b1e0a1bea46f62a842b55385085b7",
+        "size compressed (bytes)": 4172976570,
+        "documents": 4179177829,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-indonesian"
+    },
+    "mrtydi-v1.1-japanese-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-japanese.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-japanese.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "126c82da9e0e0e1fd290cf62d7fe4dfa",
+        "size compressed (bytes)": 19890571158,
+        "documents": 7000027,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-japanese"
+    },
+    "mrtydi-v1.1-korean-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-korean.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-korean.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "cf07b71aaefba58bbe150265f6696503",
+        "size compressed (bytes)": 4250320804,
+        "documents": 1496126,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-korean"
+    },
+    "mrtydi-v1.1-russian-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-russian.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-russian.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "c0a53fa6428cb9b1399a90e3a9a805d5",
+        "size compressed (bytes)": 27278520787,
+        "documents": 9597504,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-russian"
+    },
+    "mrtydi-v1.1-swahili-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-swahili.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-swahili.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "93dc3f3453815c92f3bccf4f41c5f2d4",
+        "size compressed (bytes)": 389244265,
+        "documents": 136689,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-swahili"
+    },
+    "mrtydi-v1.1-telugu-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-telugu.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-telugu.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "7aba1b7ee36e572bd982b3f62f41c380",
+        "size compressed (bytes)": 1558691592,
+        "documents": 548224,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-telugu"
+    },
+    "mrtydi-v1.1-thai-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for Mr.TyDi v1.1 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on NQ.",
+        "filename": "faiss.mrtydi-v1.1-thai.20220524.7b099d5.tar.gz",
+        "readme": "faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.mrtydi-v1.1-thai.20220524.7b099d5.tar.gz",
+        ],
+        "md5": "57151073a4c0d90b64242e4536a3af75",
+        "size compressed (bytes)": 1613563144,
+        "documents": 568855,
+        "downloaded": False,
+        "texts": "mrtydi-v1.1-thai"
+    }
+}
+
+FAISS_INDEX_INFO_MIRACL = {
+    "miracl-v1.0-ar-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ar.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ar.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "177d47e9a802c87abca52380ad1ce83b",
+        "size compressed (bytes)": 5997943791,
+        "documents": 2061414,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ar",
+    },
+    "miracl-v1.0-bn-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-bn.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-bn.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "156e8ba8cd369b1c4a606e28ea025b2e",
+        "size compressed (bytes)": 846825710,
+        "documents": 297265,
+        "downloaded": False,
+        "texts": "miracl-v1.0-bn",
+    },
+    "miracl-v1.0-en-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-en.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-en.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "ce00518f54b130a157112c2a1b2d0980",
+        "size compressed (bytes)": 93554329467,
+        "documents": 32893221,
+        "downloaded": False,
+        "texts": "miracl-v1.0-en"
+    },
+    "miracl-v1.0-es-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Spanish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-es.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-es.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "d7a9627bb60e901386f455ba6c9063ac",
+        "size compressed (bytes)": 29553300598,
+        "documents": 10373953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-es"
+    },
+    "miracl-v1.0-fa-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Persian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fa.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fa.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "e8b59e3eb2e08f61f81569c6d4c85350",
+        "size compressed (bytes)": 6286832343,
+        "documents": 2207172,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fa"
+    },
+    "miracl-v1.0-fi-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fi.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fi.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a82d6e6cf964d2e4cfac99cf14cbcc35",
+        "size compressed (bytes)": 5366190875,
+        "documents": 1883509,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fi"
+    },
+    "miracl-v1.0-fr-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (French) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fr.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fr.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a952d944aa63dcee604c8357f1be18db",
+        "size compressed (bytes)": 41648462587,
+        "documents": 14636953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fr"
+    },
+    "miracl-v1.0-hi-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Hindi) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-hi.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-hi.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "9d1dc4b948edf3df263977d82c9fcc3f",
+        "size compressed (bytes)": 1440625097,
+        "documents": 506264,
+        "downloaded": False,
+        "texts": "miracl-v1.0-hi"
+    },
+    "miracl-v1.0-id-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-id.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-id.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "19815233f5cc3a198b88cdb990459637",
+        "size compressed (bytes)": 4115281873,
+        "documents": 1446315,
+        "downloaded": False,
+        "texts": "miracl-v1.0-id"
+    },
+    "miracl-v1.0-ja-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ja.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ja.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "6e9b6e304b2b1a192a3d81e55880f971",
+        "size compressed (bytes)": 19791965448,
+        "documents": 6953614,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ja"
+    },
+    "miracl-v1.0-ko-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ko.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ko.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "ea1fa34341fc5d5ea88e5b633025d2d5",
+        "size compressed (bytes)": 4231563116,
+        "documents": 1486752,
+        "downloaded": False,
+        "texts": "miracl-v1.0-korean"
+    },
+    "miracl-v1.0-ru-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ru.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ru.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "4325e716ee6af5ea2b73d4b25f1ad76c",
+        "size compressed (bytes)": 27173379698,
+        "documents": 9543918,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ru"
+    },
+    "miracl-v1.0-sw-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-sw.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-sw.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "2b879dac6823077ae497ba8ebfce523b",
+        "size compressed (bytes)": 376181791,
+        "documents": 131924,
+        "downloaded": False,
+        "texts": "miracl-v1.0-sw"
+    },
+    "miracl-v1.0-te-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-te.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-te.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a3dfb8ba31f316c93d1fd147f88fbbfd",
+        "size compressed (bytes)": 1476021181,
+        "documents": 518079,
+        "downloaded": False,
+        "texts": "miracl-v1.0-te"
+    },
+    "miracl-v1.0-th-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-th.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-th.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "cb0c9b84a80ff338372b32857c58368d",
+        "size compressed (bytes)": 1541590044,
+        "documents": 542166,
+        "downloaded": False,
+        "texts": "miracl-v1.0-th"
+    },
+    "miracl-v1.0-zh-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Chinese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-zh.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-zh.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "2743dfaa794b7abbef1d3c912c5cc4b5",
+        "size compressed (bytes)": 14046912361,
+        "documents": 4934368,
+        "downloaded": False,
+        "texts": "miracl-v1.0-zh",
+    },
+    "miracl-v1.0-de-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (German) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-de.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-de.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "1abcf3aac78e30ebe7a75163412f1c84",
+        "size compressed (bytes)": 45154018897,
+        "documents": 15866222,
+        "downloaded": False,
+        "texts": "miracl-v1.0-de",
+    },
+    "miracl-v1.0-yo-mdpr-tied-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Yoruba) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-yo.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-yo.mdpr-tied-pft-msmarco.20221004.2b2856.tar.gz"
+        ],
+        "md5": "2ad15ea0576ae3284082ae661e001faa",
+        "size compressed (bytes)": 139412730,
+        "documents": 49043,
+        "downloaded": False,
+        "texts": "miracl-v1.0-yo",
+    },
+
+    "miracl-v1.0-ar-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ar.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ar.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "428fbde84d2c18e48f0821298947a9d1",
+        "size compressed (bytes)": 5866199790,
+        "documents": 2061414,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ar",
+    },
+    "miracl-v1.0-bn-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-bn.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-bn.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "4394a09e043be9be5b820814a82fc8ac",
+        "size compressed (bytes)": 846476050,
+        "documents": 297265,
+        "downloaded": False,
+        "texts": "miracl-v1.0-bn",
+    },
+    "miracl-v1.0-en-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-en.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-en.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "5bd57f5e4daf93294fd2cbd969c05bb3",
+        "size compressed (bytes)": 93527497283,
+        "documents": 32893221,
+        "downloaded": False,
+        "texts": "miracl-v1.0-en"
+    },
+    "miracl-v1.0-es-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Spanish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-es.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-es.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "b6db16c1ab0ae95fec0465299c660d2a",
+        "size compressed (bytes)": 29544413180,
+        "documents": 10373953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-es"
+    },
+    "miracl-v1.0-fa-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Persian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fa.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fa.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "2a2825706211eb96bd3dbb616463c661",
+        "size compressed (bytes)": 6283957262,
+        "documents": 2207172,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fa"
+    },
+    "miracl-v1.0-fi-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fi.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fi.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "65719de730cda3fa5f6a8a75611db6eb",
+        "size compressed (bytes)": 5363289277,
+        "documents": 1883509,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fi"
+    },
+    "miracl-v1.0-fr-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (French) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fr.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fr.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "24eb2f63f78aa1e39b1ea61e20661424",
+        "size compressed (bytes)": 41635104326,
+        "documents": 14636953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fr"
+    },
+    "miracl-v1.0-hi-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Hindi) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-hi.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-hi.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "d08aad08a8592aa40355fb7d50afd170",
+        "size compressed (bytes)": 1439798033,
+        "documents": 506264,
+        "downloaded": False,
+        "texts": "miracl-v1.0-hi"
+    },
+    "miracl-v1.0-id-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-id.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-id.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "b02c20d4fc27e390ec5b1e9ca732dc5a",
+        "size compressed (bytes)": 4113737773,
+        "documents": 1446315,
+        "downloaded": False,
+        "texts": "miracl-v1.0-id"
+    },
+    "miracl-v1.0-ja-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ja.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ja.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a5f219c7f46a36c5c7a2555fbdaa0479",
+        "size compressed (bytes)": 19790154560,
+        "documents": 6953614,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ja"
+    },
+    "miracl-v1.0-ko-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ko.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ko.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "67b2a803eab3491a057d4ac6b81974f1",
+        "size compressed (bytes)": 4230830690,
+        "documents": 1486752,
+        "downloaded": False,
+        "texts": "miracl-v1.0-korean"
+    },
+    "miracl-v1.0-ru-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ru.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ru.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "edad6d5cb508de61ba84173d0ad2aa31",
+        "size compressed (bytes)": 27169921407,
+        "documents": 9543918,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ru"
+    },
+    "miracl-v1.0-sw-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-sw.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-sw.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "0b039d766b55f678102a59a6e050d0bc",
+        "size compressed (bytes)": 375865677,
+        "documents": 131924,
+        "downloaded": False,
+        "texts": "miracl-v1.0-sw"
+    },
+    "miracl-v1.0-te-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-te.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-te.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "ea21915c69f70f41acadee4b6b83d129",
+        "size compressed (bytes)": 1474866678,
+        "documents": 518079,
+        "downloaded": False,
+        "texts": "miracl-v1.0-te"
+    },
+    "miracl-v1.0-th-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-th.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-th.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a5875b473109310789710e2f3df91b0f",
+        "size compressed (bytes)": 1540180247,
+        "documents": 542166,
+        "downloaded": False,
+        "texts": "miracl-v1.0-th"
+    },
+    "miracl-v1.0-zh-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Chinese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-zh.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-zh.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "a2d233e792d46c20c912d10afff033f5",
+        "size compressed (bytes)": 14043150097,
+        "documents": 4934368,
+        "downloaded": False,
+        "texts": "miracl-v1.0-zh",
+    },
+    "miracl-v1.0-de-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Chinese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-de.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-de.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "d53da12ae6119ed54ef968e968f8520a",
+        "size compressed (bytes)": 45139752128,
+        "documents": 15866222,
+        "downloaded": False,
+        "texts": "miracl-v1.0-de",
+    },
+    "miracl-v1.0-yo-mdpr-tied-pft-msmarco-ft-all": {
+        "description": "Faiss index for MIRACL v1.0 (Chinese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-yo.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz",
+        "readme": "faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-yo.mdpr-tied-pft-msmarco-ft-all.20221004.2b2856.tar.gz"
+        ],
+        "md5": "0a1b0f48108508724a3892dfc04eb756",
+        "size compressed (bytes)": 139286213,
+        "documents": 49043,
+        "downloaded": False,
+        "texts": "miracl-v1.0-yo",
+    },
+
+    "miracl-v1.0-ar-mdpr-tied-pft-msmarco-ft-miracl-ar": {
+        "description": "Faiss index for MIRACL v1.0 (Arabic) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-ar.mdpr-tied-pft-msmarco-ft-miracl-ar.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ar.mdpr-tied-pft-msmarco-ft-miracl-ar.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "29cdb7fa7cc52cabc32791d57be3bd42",
+        "size compressed (bytes)": 5871030506,
+        "documents": 2061414,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ar"
+    },
+    "miracl-v1.0-bn-mdpr-tied-pft-msmarco-ft-miracl-bn": {
+        "description": "Faiss index for MIRACL v1.0 (Bengali) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-bn.mdpr-tied-pft-msmarco-ft-miracl-bn.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-bn.mdpr-tied-pft-msmarco-ft-miracl-bn.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "8972166564a9c13e102ae83ea062c166",
+        "size compressed (bytes)": 846236944,
+        "documents": 297265,
+        "downloaded": False,
+        "texts": "miracl-v1.0-bn"
+    },
+    "miracl-v1.0-en-mdpr-tied-pft-msmarco-ft-miracl-en": {
+        "description": "Faiss index for MIRACL v1.0 (English) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-en.mdpr-tied-pft-msmarco-ft-miracl-en.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-en.mdpr-tied-pft-msmarco-ft-miracl-en.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "cd43e6c93879a107b94396a42aa7c987",
+        "size compressed (bytes)": 93502848095,
+        "documents": 32893221,
+        "downloaded": False,
+        "texts": "miracl-v1.0-en"
+    },
+    "miracl-v1.0-es-mdpr-tied-pft-msmarco-ft-miracl-es": {
+        "description": "Faiss index for MIRACL v1.0 (Spanish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-es.mdpr-tied-pft-msmarco-ft-miracl-es.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-es.mdpr-tied-pft-msmarco-ft-miracl-es.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "4f45c3171690dd691afcfc9e45b89494",
+        "size compressed (bytes)": 29552466540,
+        "documents": 10373953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-es"
+    },
+    "miracl-v1.0-fa-mdpr-tied-pft-msmarco-ft-miracl-fa": {
+        "description": "Faiss index for MIRACL v1.0 (Persian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-fa.mdpr-tied-pft-msmarco-ft-miracl-fa.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fa.mdpr-tied-pft-msmarco-ft-miracl-fa.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "ae262fea849f6903c93e1f3269e07804",
+        "size compressed (bytes)": 6287728719,
+        "documents": 2207172,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fa"
+    },
+    "miracl-v1.0-fi-mdpr-tied-pft-msmarco-ft-miracl-fi": {
+        "description": "Faiss index for MIRACL v1.0 (Finnish) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-fi.mdpr-tied-pft-msmarco-ft-miracl-fi.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fi.mdpr-tied-pft-msmarco-ft-miracl-fi.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "12c5c5c4dd8df37ad8ae90039851fbec",
+        "size compressed (bytes)": 5367069541,
+        "documents": 1883509,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fi"
+    },
+    "miracl-v1.0-fr-mdpr-tied-pft-msmarco-ft-miracl-fr": {
+        "description": "Faiss index for MIRACL v1.0 (French) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-fr.mdpr-tied-pft-msmarco-ft-miracl-fr.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fr.mdpr-tied-pft-msmarco-ft-miracl-fr.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "8cf28f8df0805a848cb5c54d5f5d8bfb",
+        "size compressed (bytes)": 41654288474,
+        "documents": 14636953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fr"
+    },
+    "miracl-v1.0-hi-mdpr-tied-pft-msmarco-ft-miracl-hi": {
+        "description": "Faiss index for MIRACL v1.0 (Hindi) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-hi.mdpr-tied-pft-msmarco-ft-miracl-hi.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-hi.mdpr-tied-pft-msmarco-ft-miracl-hi.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "f579dfa45a5f14c48f97ba9980f7dec8",
+        "size compressed (bytes)": 1440859085,
+        "documents": 506264,
+        "downloaded": False,
+        "texts": "miracl-v1.0-hi"
+    },
+    "miracl-v1.0-id-mdpr-tied-pft-msmarco-ft-miracl-id": {
+        "description": "Faiss index for MIRACL v1.0 (Indonesian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-id.mdpr-tied-pft-msmarco-ft-miracl-id.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-id.mdpr-tied-pft-msmarco-ft-miracl-id.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "d5b540fb82fe21c1fd2b56e248184af6",
+        "size compressed (bytes)": 4111428848,
+        "documents": 1446315,
+        "downloaded": False,
+        "texts": "miracl-v1.0-id"
+    },
+    "miracl-v1.0-ja-mdpr-tied-pft-msmarco-ft-miracl-ja": {
+        "description": "Faiss index for MIRACL v1.0 (Japanese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-ja.mdpr-tied-pft-msmarco-ft-miracl-ja.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ja.mdpr-tied-pft-msmarco-ft-miracl-ja.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "e7ad21b12a7d5e937c55d49184d68814",
+        "size compressed (bytes)": 19790420501,
+        "documents": 6953614,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ja"
+    },
+    "miracl-v1.0-ko-mdpr-tied-pft-msmarco-ft-miracl-ko": {
+        "description": "Faiss index for MIRACL v1.0 (Korean) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-ko.mdpr-tied-pft-msmarco-ft-miracl-ko.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ko.mdpr-tied-pft-msmarco-ft-miracl-ko.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "c31290dfae5429549500759279af3a8d",
+        "size compressed (bytes)": 4230154713,
+        "documents": 1486752,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ko"
+    },
+    "miracl-v1.0-ru-mdpr-tied-pft-msmarco-ft-miracl-ru": {
+        "description": "Faiss index for MIRACL v1.0 (Russian) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-ru.mdpr-tied-pft-msmarco-ft-miracl-ru.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ru.mdpr-tied-pft-msmarco-ft-miracl-ru.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "b9460efd096292a1012ab1d27082498e",
+        "size compressed (bytes)": 27177739148,
+        "documents": 9543918,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ru"
+    },
+    "miracl-v1.0-sw-mdpr-tied-pft-msmarco-ft-miracl-sw": {
+        "description": "Faiss index for MIRACL v1.0 (Swahili) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-sw.mdpr-tied-pft-msmarco-ft-miracl-sw.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-sw.mdpr-tied-pft-msmarco-ft-miracl-sw.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "526a930a27353462e11cc7e1b794dcc7",
+        "size compressed (bytes)": 375865597,
+        "documents": 131924,
+        "downloaded": False,
+        "texts": "miracl-v1.0-sw"
+    },
+    "miracl-v1.0-te-mdpr-tied-pft-msmarco-ft-miracl-te": {
+        "description": "Faiss index for MIRACL v1.0 (Telugu) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-te.mdpr-tied-pft-msmarco-ft-miracl-te.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-te.mdpr-tied-pft-msmarco-ft-miracl-te.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "f64b28542afdd15b2fe3831972bcd91e",
+        "size compressed (bytes)": 1475895517,
+        "documents": 518079,
+        "downloaded": False,
+        "texts": "miracl-v1.0-te"
+    },
+    "miracl-v1.0-th-mdpr-tied-pft-msmarco-ft-miracl-th": {
+        "description": "Faiss index for MIRACL v1.0 (Thai) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-th.mdpr-tied-pft-msmarco-ft-miracl-th.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-th.mdpr-tied-pft-msmarco-ft-miracl-th.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "b6ba6d5363bf07a5dc8e1cd35fe11e93",
+        "size compressed (bytes)": 1540581013,
+        "documents": 542166,
+        "downloaded": False,
+        "texts": "miracl-v1.0-th"
+    },
+    "miracl-v1.0-zh-mdpr-tied-pft-msmarco-ft-miracl-zh": {
+        "description": "Faiss index for MIRACL v1.0 (Chinese) corpus encoded by mDPR passage encoder pre-fine-tuned on MS MARCO, then fine-tuned in-language with MIRACL.",
+        "filename": "faiss.miracl-v1.0-zh.mdpr-tied-pft-msmarco-ft-miracl-zh.20230329.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-zh.mdpr-tied-pft-msmarco-ft-miracl-zh.20230329.e40d4a.tar.gz",
+        ],
+        "md5": "feba34e41cb8234988f7fb99bd8998f3",
+        "size compressed (bytes)": 14049243202,
+        "documents": 4934368,
+        "downloaded": False,
+        "texts": "miracl-v1.0-zh"
+    },
+
+    "miracl-v1.0-ar-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Arabic) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ar.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ar.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "80c18ac84922ae27bfbee881485816c6",
+        "size compressed (bytes)": 5861079368,
+        "documents": 2061414,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ar",
+    },
+    "miracl-v1.0-bn-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Bengali) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-bn.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-bn.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "08191b7749151a7bc70e54b92988dd25",
+        "size compressed (bytes)": 845828394, 
+        "documents": 297265,
+        "downloaded": False,
+        "texts": "miracl-v1.0-bn",
+    },
+    "miracl-v1.0-en-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (English) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-en.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-en.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "a460d0eb95cf8a278686531e13141d00",
+        "size compressed (bytes)": 93426889457, 
+        "documents": 32893221,
+        "downloaded": False,
+        "texts": "miracl-v1.0-en"
+    },
+    "miracl-v1.0-es-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Spanish) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-es.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-es.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "936e9188c4dcf57f8f116b9e25790372",
+        "size compressed (bytes)": 29499200527,
+        "documents": 10373953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-es"
+    },
+    "miracl-v1.0-fa-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Persian) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fa.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fa.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "58f83135ecafae6993e49f5f08e471ff",
+        "size compressed (bytes)": 6278766617,
+        "documents": 2207172,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fa"
+    },
+    "miracl-v1.0-fi-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Finnish) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fi.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fi.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "b10bc504213199fe0c0972678ab4fdd6",
+        "size compressed (bytes)": 5358004166,
+        "documents": 1883509,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fi"
+    },
+    "miracl-v1.0-fr-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (French) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-fr.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-fr.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "b0d5543824b456d9008d05d7dcef5272",
+        "size compressed (bytes)": 41578767020, 
+        "documents": 14636953,
+        "downloaded": False,
+        "texts": "miracl-v1.0-fr"
+    },
+    "miracl-v1.0-hi-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Hindi) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-hi.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-hi.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "ba66e98169b22244c7a7a89ae9bfe549", 
+        "size compressed (bytes)": 1439122724, 
+        "documents": 506264,
+        "downloaded": False,
+        "texts": "miracl-v1.0-hi"
+    },
+    "miracl-v1.0-id-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Indonesian) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-id.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-id.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "700466ab62bfd4b0ceddff7aa9b7a5f8",
+        "size compressed (bytes)": 4113610061,
+        "documents": 1446315,
+        "downloaded": False,
+        "texts": "miracl-v1.0-id"
+    },
+    "miracl-v1.0-ja-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Japanese) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ja.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ja.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "f0358ae58b32456c3cef5f71e83a0143",
+        "size compressed (bytes)": 19772957772,
+        "documents": 6953614,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ja"
+    },
+    "miracl-v1.0-ko-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Korean) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ko.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ko.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "fa00afb61fa4332c408069cb6eb2e8f2",
+        "size compressed (bytes)": 4229330667,
+        "documents": 1486752,
+        "downloaded": False,
+        "texts": "miracl-v1.0-korean"
+    },
+    "miracl-v1.0-ru-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Russian) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-ru.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-ru.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "118835c214f7b24997ab9f1744b3f5ee",
+        "size compressed (bytes)": 27155045095, 
+        "documents": 9543918,
+        "downloaded": False,
+        "texts": "miracl-v1.0-ru"
+    },
+    "miracl-v1.0-sw-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Swahili) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-sw.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-sw.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "ae45812eadb685c672f7b19c084ae3bc",
+        "size compressed (bytes)": 375416284,
+        "documents": 131924,
+        "downloaded": False,
+        "texts": "miracl-v1.0-sw"
+    },
+    "miracl-v1.0-te-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Telugu) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-te.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-te.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "8cbea3c141002dd477a15b387350ea37",
+        "size compressed (bytes)": 1474250608,
+        "documents": 518079,
+        "downloaded": False,
+        "texts": "miracl-v1.0-te"
+    },
+    "miracl-v1.0-th-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Thai) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-th.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-th.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "58cd7d862f202ece45dbd4cb6b6d12f4",
+        "size compressed (bytes)": 1540980581,
+        "documents": 542166,
+        "downloaded": False,
+        "texts": "miracl-v1.0-th"
+    },
+    "miracl-v1.0-zh-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Chinese) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-zh.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-zh.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "d8800abe1ac22b4161704f2b6d4fe575",
+        "size compressed (bytes)": 14034991692,
+        "documents": 4934368,
+        "downloaded": False,
+        "texts": "miracl-v1.0-zh",
+    },
+    "miracl-v1.0-de-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (German) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-de.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-de.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "218cb42441af355285fbf219e9d2d7c7",
+        "size compressed (bytes)": 45085913144,
+        "documents": 15866222,
+        "downloaded": False,
+        "texts": "miracl-v1.0-de",
+    },
+    "miracl-v1.0-yo-mcontriever-pft-msmarco": {
+        "description": "Faiss index for MIRACL v1.0 (Yoruba) corpus encoded by mContriever passage encoder pre-fine-tuned on MS MARCO.",
+        "filename": "faiss.miracl-v1.0-yo.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz",
+        "readme": "faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.miracl-v1.0-yo.mcontriever-tied-pft-msmarco.20230313.e40d4a.tar.gz"
+        ],
+        "md5": "f8aee10055a31914c4c214819a7c1890",
+        "size compressed (bytes)": 139276690,
+        "documents": 49043,
+        "downloaded": False,
+        "texts": "miracl-v1.0-yo",
+    }
+
+}
+
+FAISS_INDEX_INFO_WIKIPEDIA = {
+    "wikipedia-dpr-100w.dpr-multi": {
+        "description": "Faiss FlatIP index of Wikipedia encoded by the DPR doc encoder trained on multiple QA datasets",
+        "filename": "faiss.wikipedia-dpr-100w.dpr_multi.20200127.f403c3.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wikipedia-dpr-100w.dpr_multi.20200127.f403c3.tar.gz"
+        ],
+        "md5": "fe307ef2e60ab6e6f3ad66e24a4144ae",
+        "size compressed (bytes)": 59836766732,
+        "documents": 21015320,
+        "downloaded": False,
+        "texts": "wikipedia-dpr-100w"
+    },
+    "wikipedia-dpr-100w.dpr-single-nq": {
+        "description": "Faiss FlatIP index of Wikipedia encoded by the DPR doc encoder trained on NQ",
+        "filename": "faiss.wikipedia-dpr-100w.dpr_single-nq.20200115.cd5034.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wikipedia-dpr-100w.dpr_single-nq.20200115.cd5034.tar.gz"
+        ],
+        "md5": "01fb6bcaa047df254663d0a3d854b7cc",
+        "size compressed (bytes)": 59836863979,
+        "documents": 21015320,
+        "downloaded": False,
+        "texts": "wikipedia-dpr-100w"
+    },
+    "wikipedia-dpr-100w.bpr-single-nq": {
+        "description": "Faiss binary index of Wikipedia encoded by the BPR doc encoder trained on NQ",
+        "filename": "faiss.wikipedia-dpr-100w.bpr_single-nq.20210827.8a8f75.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wikipedia-dpr-100w.bpr_single-nq.20210827.8a8f75.tar.gz"
+        ],
+        "md5": "b022580ab2fc66f6eaa54af241dba690",
+        "size compressed (bytes)": 1886380629,
+        "documents": 21015320,
+        "downloaded": False,
+        "texts": "wikipedia-dpr-100w"
+    },
+    "wikipedia-dpr-100w.ance-multi": {
+        "description": "Faiss FlatIP index of Wikipedia encoded by the ANCE-multi encoder",
+        "filename": "faiss.wikipedia-dpr-100w.ance_multi.20210224.060cef.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wikipedia-dpr-100w.ance_multi.20210224.060cef.tar.gz"
+        ],
+        "md5": "eb00e096460c8e6296a39732f1676dd7",
+        "size compressed (bytes)": 59890491335,
+        "documents": 21015320,
+        "downloaded": False,
+        "texts": "wikipedia-dpr-100w"
+    },
+    "wikipedia-dpr-100w.dkrr-nq": {
+        "description": "Faiss FlatIP index of Wikipedia DPR encoded by the retriever model from 'Distilling Knowledge from Reader to Retriever for Question Answering' trained on NQ",
+        "filename": "faiss.wikipedia-dpr-100w.dkrr-dpr-nq-retriever.20220217.25ed1f.cc91b2.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wikipedia-dpr-100w.dkrr-dpr-nq-retriever.20220217.25ed1f.cc91b2.tar.gz",
+        ],
+        "md5": "36a658e08dafb3e3313b05f88e001557",
+        "size compressed (bytes)": 37812137732,
+        "documents": 21015324,
+        "downloaded": False,
+        "texts": "wwikipedia-dpr-100w"
+    },
+    "wikipedia-dpr-100w.dkrr-tqa": {
+        "description": "Faiss FlatIP index of Wikipedia DPR encoded by the retriever model from 'Distilling Knowledge from Reader to Retriever for Question Answering' trained on TriviaQA",
+        "filename": "faiss.wikipedia-dpr-100w.dkrr-dpr-tqa-retriever.20220217.25ed1f.cc91b2.tar.gz",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wikipedia-dpr-100w.dkrr-dpr-tqa-retriever.20220217.25ed1f.cc91b2.tar.gz",
+        ],
+        "md5": "072a514ca3ff7717339038d024019e3d",
+        "size compressed (bytes)": 37802648577,
+        "documents": 21015324,
+        "downloaded": False,
+        "texts": "wikipedia-dpr-100w"
+    },
+    "wiki-all-6-3.dpr2-multi-retriever": {
+        "description": "Faiss FlatIP index of wiki-all-6-3-tamber encoded by a 2nd iteration DPR model trained on multiple QA datasets",
+        "filename": "faiss.wiki-all-6-3.dpr2-multi-retriever.20230103.186fa7.tar.gz",
+        "readme": "faiss-flat.wiki-all-6-3.dpr2-multi-retriever.20230103.186fa7.README.md",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.wiki-all-6-3.dpr2-multi-retriever.20230103.186fa7.tar.gz",
+        ],
+        "md5": "823b6297d6fd8011598e7618742ac7f8",
+        "size compressed (bytes)": 218257913366,
+        "documents": 76680040,
+        "downloaded": False,
+        "texts": "wiki-all-6-3-tamber"
+    }
+}
+
+FAISS_INDEX_INFO_OTHER = {
+    "cast2019-tct_colbert-v2.hnsw": {
+        "description": "Faiss HNSW index of the CAsT2019 passage corpus encoded by the tct_colbert-v2 passage encoder",
+        "filename": "faiss-hnsw.cast2019.tct_colbert-v2.tar.gz",
+        "readme": "faiss-hnsw.cast2019.tct_colbert-v2-readme.txt",
+        "urls": [
+            "https://rgw.cs.uwaterloo.ca/pyserini/indexes/hnsw-faiss.cast2019.tct_colbert-v2.tar.gz"
+        ],
+        "md5": "2ce7ce8064ed235a9b6aad08571340d4",
+        "size compressed (bytes)": 112121368296,
+        "documents": 38429835,
+        "downloaded": False,
+        "texts": "cast2019"
+    }
+}
+
+FAISS_INDEX_INFO = {**FAISS_INDEX_INFO_MSMARCO,
+                    **FAISS_INDEX_INFO_BEIR,
+                    **FAISS_INDEX_INFO_MRTYDI,
+                    **FAISS_INDEX_INFO_MIRACL,
+                    **FAISS_INDEX_INFO_WIKIPEDIA,
+                    **FAISS_INDEX_INFO_OTHER}
diff --git a/pyserini/pyclass.py b/pyserini/pyclass.py
new file mode 100644
index 0000000000000000000000000000000000000000..599f3cfc0ac9da42464caa4b9cb9b1cc148525e6
--- /dev/null
+++ b/pyserini/pyclass.py
@@ -0,0 +1,36 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Module for hiding Python-Java calls via Pyjnius
+"""
+
+from .setup import configure_classpath, os
+
+# If the environment variable isn't defined, look in the current directory.
+configure_classpath(os.environ['ANSERINI_CLASSPATH'] if 'ANSERINI_CLASSPATH' in os.environ else
+                    os.path.join(os.path.split(__file__)[0], 'resources/jars/'))
+
+from jnius import autoclass, cast
+
+# Base Java classes
+JString = autoclass('java.lang.String')
+JFloat = autoclass('java.lang.Float')
+JPath = autoclass('java.nio.file.Path')
+JPaths = autoclass('java.nio.file.Paths')
+JList = autoclass('java.util.List')
+JArrayList = autoclass('java.util.ArrayList')
+JHashMap = autoclass('java.util.HashMap')
diff --git a/pyserini/query_iterator.py b/pyserini/query_iterator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f48a1d7d1a851a9df1f31485abf3f379665061fa
--- /dev/null
+++ b/pyserini/query_iterator.py
@@ -0,0 +1,161 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import json
+from abc import ABC, abstractmethod
+from enum import Enum, unique
+from pathlib import Path
+
+from pyserini.search import get_topics, get_topics_with_reader
+from pyserini.util import download_url, get_cache_home
+from pyserini.external_query_info import KILT_QUERY_INFO
+from urllib.error import HTTPError, URLError
+
+
+@unique
+class TopicsFormat(Enum):
+    DEFAULT = 'default'
+    KILT = 'kilt'
+
+
+class QueryIterator(ABC):
+
+    PREDEFINED_ORDER = {'msmarco-doc-dev',
+                        'msmarco-doc-test',
+                        'msmarco-passage-dev-subset',
+                        'msmarco-passage-test-subset'}
+
+    def __init__(self, topics: dict, order: list = None):
+        self.order = order if order else sorted(topics.keys())
+        self.topics = topics
+
+    @abstractmethod
+    def get_query(self, id_):
+        raise NotImplementedError()
+
+    @classmethod
+    @abstractmethod
+    def from_topics(cls, topics_path: str):
+        raise NotImplementedError()
+
+    def __iter__(self):
+        for id_ in self.order:
+            yield id_, self.get_query(id_)
+
+    def __len__(self):
+        return len(self.topics.keys())
+
+    @staticmethod
+    def get_predefined_order(topics_path: str):
+        order = None
+        normalized_path = Path(topics_path).stem  # get filename w/o extension
+        normalized_path = normalized_path.replace('_', '-')
+
+        if normalized_path in QueryIterator.PREDEFINED_ORDER:
+            print(f'Using pre-defined topic order for {normalized_path}')
+            # Lazy import:
+            from pyserini.query_iterator_order_info import QUERY_IDS
+            order = QUERY_IDS[topics_path]
+        return order
+
+
+class DefaultQueryIterator(QueryIterator):
+
+    def get_query(self, id_):
+        return self.topics[id_].get('title')
+
+    @classmethod
+    def from_topics(cls, topics_path: str):
+        if os.path.exists(topics_path):
+            if topics_path.endswith('.json'):
+                with open(topics_path, 'r') as f:
+                    topics = json.load(f)
+            elif 'beir' in topics_path:
+                topics = get_topics_with_reader('io.anserini.search.topicreader.TsvStringTopicReader', topics_path)
+            elif topics_path.endswith('.tsv') or topics_path.endswith('.tsv.gz'):
+                try:
+                    topics = get_topics_with_reader('io.anserini.search.topicreader.TsvIntTopicReader', topics_path)
+                except ValueError as e:
+                    topics = get_topics_with_reader('io.anserini.search.topicreader.TsvStringTopicReader', topics_path)
+            elif topics_path.endswith('.trec'):
+                topics = get_topics_with_reader('io.anserini.search.topicreader.TrecTopicReader', topics_path)
+            elif 'cacm' in topics_path:
+                topics = get_topics_with_reader('io.anserini.search.topicreader.CacmTopicReader', topics_path)
+            elif topics_path.endswith('.jsonl'):
+                topics = get_topics_with_reader('io.anserini.search.topicreader.JsonStringTopicReader', topics_path)
+            else:
+                raise NotImplementedError(f"Not sure how to parse {topics_path}. Please specify the file extension.")
+        else:
+            topics = get_topics(topics_path)
+        if not topics:
+            raise FileNotFoundError(f'Topic {topics_path} Not Found')
+        order = QueryIterator.get_predefined_order(topics_path)
+        return cls(topics, order)
+
+
+class KiltQueryIterator(QueryIterator):
+
+    ENT_START_TOKEN = "[START_ENT]"
+    ENT_END_TOKEN = "[END_ENT]"
+
+    def get_query(self, id_):
+        datapoint = self.topics[id_]
+        query = (
+            datapoint["input"]
+            .replace(KiltQueryIterator.ENT_START_TOKEN, "")
+            .replace(KiltQueryIterator.ENT_END_TOKEN, "")
+            .strip()
+        )
+        return query
+
+    @classmethod
+    def from_topics(cls, topics_path: str):
+        topics = {}
+        order = []
+        if not os.path.exists(topics_path):
+            # Download if necessary:
+            topics_path = cls.download_kilt_topics(topics_path)
+        with open(topics_path, 'r') as f:
+            for line in f:
+                datapoint = json.loads(line)
+                topics[datapoint["id"]] = datapoint
+                order.append(datapoint["id"])
+        return cls(topics, order)
+
+    @classmethod
+    def download_kilt_topics(cls, task: str, force=False):
+        if task not in KILT_QUERY_INFO:
+            raise ValueError(f'Unrecognized query name {task}')
+        task = KILT_QUERY_INFO[task]
+        md5 = task['md5']
+        save_dir = os.path.join(get_cache_home(), 'queries')
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        for url in task['urls']:
+            try:
+                return download_url(url, save_dir, force=force, md5=md5)
+            except (HTTPError, URLError) as e:
+                print(f'Unable to download encoded query at {url}, trying next URL...')
+        raise ValueError(f'Unable to download encoded query at any known URLs.')
+
+
+def get_query_iterator(topics_path: str, topics_format: TopicsFormat):
+    mapping = {
+        TopicsFormat.DEFAULT: DefaultQueryIterator,
+        TopicsFormat.KILT: KiltQueryIterator,
+    }
+    return mapping[topics_format].from_topics(topics_path)
diff --git a/pyserini/query_iterator_order_info.py b/pyserini/query_iterator_order_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ddffdc8235b952e084e270a13da297765e7f497
--- /dev/null
+++ b/pyserini/query_iterator_order_info.py
@@ -0,0 +1,22 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+QUERY_IDS = {
+    'msmarco-doc-dev': [174249, 320792, 1090270, 1101279, 201376, 54544, 118457, 178627, 1101278, 68095, 87892, 257309, 1090242, 211691, 165002, 1101276, 264827, 342285, 372586, 89786, 118448, 92542, 206117, 141472, 196232, 352818, 208145, 79891, 208494, 319564, 155234, 14151, 67802, 1090184, 323382, 323998, 289812, 333486, 1090171, 73257, 1090170, 127876, 1090165, 259417, 1101271, 205107, 307118, 335710, 127984, 1090151, 1090146, 1090132, 1090115, 1090110, 1090107, 1090086, 1090077, 1090072, 1090054, 1101259, 1089983, 1089966, 1089964, 1089940, 1089925, 1089896, 1101236, 1089868, 1089846, 1089832, 1089810, 1101228, 1089804, 1089787, 1089776, 1089763, 1089760, 1089719, 1089706, 1089693, 1089691, 1089688, 1089683, 1089674, 1101214, 1089645, 1101211, 1089619, 1089597, 1089576, 1089560, 1089541, 1089511, 1089501, 1089469, 1089443, 1089438, 1089434, 1089414, 1089408, 1089401, 1089355, 1089325, 1089312, 1089293, 1089286, 1089273, 1101173, 1101172, 1101171, 1089177, 1089167, 1089158, 1089156, 1089143, 1089121, 1089093, 1089085, 1089071, 1089051, 1089044, 1089043, 1089036, 1089027, 1089026, 1089022, 1089021, 1089002, 1089001, 1088993, 1088987, 1088973, 1088960, 1088958, 1088947, 1088938, 1088928, 1088903, 1088889, 1088884, 1088869, 1088856, 1088845, 1088832, 1088800, 1101131, 1102300, 1088758, 1088734, 1088718, 1088693, 1101121, 1088685, 1088628, 1088606, 1088541, 1088539, 1088475, 1088453, 1101090, 1088437, 1101088, 1088379, 1088358, 1088349, 1088302, 1088164, 1088153, 1088138, 1088043, 1087999, 1087959, 1101048, 1087915, 1087911, 1087869, 1087858, 1087848, 1087803, 1087795, 1087774, 1087766, 1087736, 1087729, 1087727, 1087722, 1087687, 1087680, 1087675, 1087634, 1101018, 1087589, 1087581, 1087566, 1087556, 1087532, 1087514, 1087492, 1087487, 1087486, 1087484, 1087425, 1087375, 1087361, 1087351, 1087327, 1087317, 1087309, 1087238, 1087226, 1087215, 1100986, 1087204, 1087185, 1087173, 1087171, 1100980, 1087129, 1087122, 1087114, 1087105, 1087077, 1087076, 1087074, 1087066, 1087061, 1087047, 1087046, 1087042, 1087018, 1087014, 1087001, 1086974, 1086933, 1086928, 1086927, 1086917, 1086915, 1086893, 1086886, 1086883, 1086874, 1086860, 1086855, 1086836, 1086834, 1086765, 1086760, 1086715, 1086713, 1100937, 1086708, 1086701, 1086681, 1086679, 1100933, 1086628, 1100930, 1086595, 1086581, 1086565, 1086555, 1086532, 1086477, 1100919, 1086468, 1086439, 1086430, 1086424, 1086391, 1086385, 1086384, 1086326, 1086309, 1086288, 1086281, 1086271, 1086266, 1086248, 1086241, 1086224, 1086200, 1086174, 1086120, 1086075, 1086046, 1086022, 1086014, 1085980, 1085967, 1100875, 1085943, 1085918, 1085889, 1085845, 1085842, 1085812, 1085804, 1085796, 1085780, 1085779, 1085764, 1085762, 1085760, 1085733, 1085697, 1085674, 1100852, 1085658, 1085630, 1085613, 1085586, 1085584, 1085572, 1085550, 1085533, 1085532, 1085521, 1085517, 1100839, 1085510, 1085457, 1085456, 1085454, 1085441, 1085422, 1085421, 1085393, 1085356, 1085348, 1085341, 1085339, 1085327, 1085319, 1085288, 1085279, 1085245, 1085229, 1100816, 1085197, 1085141, 1085048, 1085035, 1085013, 1085008, 1084986, 1084982, 1084971, 1084910, 1084906, 1084898, 1084889, 1084887, 1100783, 1084848, 1084838, 1084814, 1084769, 1084755, 1084722, 1084713, 1084712, 1084686, 1084603, 1084582, 1084518, 1084516, 1084512, 1084478, 1084475, 1084469, 1084408, 1084403, 1084389, 1084383, 1084354, 1084336, 1084330, 1084326, 1084324, 1084308, 1084301, 1100732, 1084276, 1084273, 1084233, 1084230, 1084197, 1100724, 1084192, 1084086, 1084076, 1084075, 1084038, 1083997, 1083948, 1083945, 1083933, 1083926, 1083909, 1083832, 1083831, 1083822, 1083800, 1083797, 1100687, 1083783, 1083727, 1083721, 1083704, 1083690, 1083686, 1083641, 1083627, 1083597, 1083584, 1083535, 1083517, 1083502, 1083500, 1083499, 1083472, 1083443, 1083430, 1083428, 1083410, 1083362, 1083345, 1083341, 1083340, 1083332, 1083307, 1083293, 1100639, 1083285, 1083278, 1083268, 1083267, 1100634, 1083161, 1083158, 1083152, 1083127, 1083125, 1083108, 1083095, 1083092, 1083085, 1083017, 1083010, 1083000, 1082948, 1082947, 1082924, 1082893, 1082870, 1082840, 1082835, 1082807, 1082792, 1082779, 1082759, 1082751, 1082750, 1082730, 1082668, 1082653, 1082622, 1082607, 1100581, 1082536, 1082531, 1082502, 1082455, 1082445, 1082427, 1082384, 1082377, 1082351, 1082341, 1082339, 1082332, 1082281, 1082265, 1082263, 1082242, 1082117, 1100544, 1081946, 1100541, 1081730, 1081609, 1081595, 1081338, 1100537, 1081091, 1081086, 1080970, 1080950, 1100533, 1080555, 1080406, 1080253, 1080031, 1080010, 1102240, 1079535, 1079340, 1079086, 1079050, 1078906, 1078765, 1077844, 1077019, 1077006, 1077002, 1100499, 1076269, 1075980, 1100492, 1075919, 1075741, 1075656, 1100488, 1075608, 1075591, 1075348, 1075313, 1075262, 1074997, 1074995, 1074989, 1074949, 1074883, 1074807, 1074804, 1074001, 1073640, 1073569, 1073365, 1073358, 1072874, 1072750, 1072603, 1072513, 1100458, 1072500, 1100457, 1100455, 1072188, 1100454, 1071992, 1071598, 1071545, 1071534, 1071389, 1071198, 1070546, 1070452, 1070324, 1070131, 1100438, 1069981, 1069717, 1069521, 1069405, 1069344, 1069222, 1069128, 1068952, 1068715, 1068584, 1068290, 1068276, 1067826, 1067772, 1067764, 1067724, 1067659, 1100415, 1067640, 1067587, 1067284, 1067276, 1066971, 1066966, 1066958, 1066916, 1066709, 1100403, 1066161, 1065971, 1065712, 1065650, 1065558, 1065551, 1065494, 1065160, 1064961, 1064808, 1064687, 1064206, 1064195, 1063974, 1063892, 1063777, 1063758, 1063702, 1063644, 1063607, 1063478, 1063349, 1100370, 1062961, 1062928, 1062784, 1062744, 1062687, 1062589, 1062511, 1062350, 1062332, 1062223, 1061762, 1061472, 1061324, 1061237, 1061210, 1061167, 1060881, 1060868, 1060795, 1060623, 1060566, 1060496, 1060462, 1060391, 1060040, 1060039, 1059698, 1059601, 1059421, 1059420, 1059287, 1059077, 1059045, 1100319, 1058952, 1058885, 1058604, 1100308, 1058601, 1058515, 1058470, 1058442, 1058325, 1058271, 1058182, 1058141, 1100299, 1058036, 1057996, 1057937, 1057708, 1057656, 1057539, 1057334, 1057139, 1057112, 1057015, 1056758, 1056742, 1056726, 1056644, 1056580, 1056437, 1056211, 1056163, 1056159, 1056060, 1056057, 1055940, 1055889, 1055717, 1055505, 1055197, 1055125, 1054999, 1054969, 1054958, 1054923, 1054707, 1054468, 1054451, 1054450, 1054440, 1054438, 1054189, 1054023, 1053931, 1053901, 1053896, 1053716, 1100229, 1102206, 1100224, 1053253, 1053219, 1052985, 1052965, 1100218, 1052640, 1052115, 1052089, 1051942, 1051902, 1051808, 1051571, 1051530, 1051520, 1051475, 1051422, 1051372, 1051352, 1051339, 1051307, 1100190, 1051285, 1051229, 1051223, 1051214, 1051211, 1100188, 1051112, 1100187, 1051108, 1050923, 1050778, 1050695, 1050231, 1049955, 1100173, 1049791, 1049774, 1049456, 1100168, 1049329, 1100167, 1049085, 1048917, 1048642, 1048381, 1048359, 1048303, 1048282, 1048281, 1100151, 1048185, 1047917, 1047913, 1047854, 1047843, 1047833, 1047794, 1047738, 1047708, 1047702, 1047700, 1047662, 1047642, 1047629, 1047599, 1100137, 1047548, 1047386, 1047365, 1047160, 1047138, 1047010, 1046969, 1046931, 1046736, 1100119, 1046569, 1046520, 1046475, 1046463, 1046161, 1100106, 1046047, 1046042, 1100105, 1045855, 1045826, 1045717, 1045709, 1045567, 1100094, 1045554, 1045540, 1045527, 1045494, 1045374, 1045347, 1045229, 1045227, 1045208, 1045135, 1045071, 1100077, 1100070, 1043914, 1043815, 1043658, 1043568, 1043545, 1043337, 1043064, 1042978, 1042800, 1042752, 1042626, 1042426, 1042364, 1042158, 1042099, 1041951, 1041948, 1041924, 1100035, 1041520, 1041226, 1041146, 1041043, 1040959, 1040848, 1040703, 1040532, 1040409, 1040312, 1040099, 1040088, 1040082, 1040064, 1100010, 1040038, 1040030, 1039728, 1039521, 1039298, 1099998, 1039002, 1038879, 1038859, 1038830, 1038724, 1038527, 1099985, 1038184, 1099981, 1099980, 1037872, 1037817, 1037689, 1037250, 1036784, 1036782, 1036627, 1102177, 1036385, 1036380, 1036244, 1036214, 1036005, 1035931, 1035874, 1035805, 1035719, 1035535, 1035383, 1035379, 1035367, 1035321, 1035278, 1035247, 1035006, 1034845, 1034761, 1034703, 1034680, 1034666, 1034595, 1034587, 1034446, 1034409, 1034204, 1034136, 1099914, 1033962, 1033927, 1099911, 1033725, 1033718, 1033652, 1033534, 1033398, 1033250, 1033249, 1033205, 1033092, 1032822, 1032758, 1032341, 1032281, 1099888, 1032198, 1032182, 1032019, 1031976, 1031861, 1031684, 1031682, 1031054, 1031033, 1031032, 1030924, 1030823, 1030722, 1030623, 1099859, 1030381, 1099855, 1030378, 1030324, 1030176, 1029909, 1029908, 1029772, 1029617, 1029552, 1029544, 1029492, 1099836, 1099834, 1029124, 1029058, 1029031, 1029030, 1029003, 1028796, 1028755, 1028753, 1099823, 1028711, 1028608, 1028598, 1028538, 1099816, 1028179, 1028098, 1027919, 1027817, 1027812, 1102163, 1027373, 1099806, 1099805, 1027178, 1026991, 1026799, 1026768, 1026148, 1025991, 1025801, 1025624, 1025483, 1025290, 1025270, 1025259, 1024904, 1024893, 1024727, 1024672, 1024667, 1024592, 1024591, 1024528, 1024288, 1024221, 1024166, 1024069, 1099756, 1023025, 1022907, 1022832, 1022782, 1022712, 1022621, 1022442, 1022410, 1022370, 1022359, 1022198, 1022178, 1022124, 1021971, 1099729, 1021931, 1021900, 1099726, 1021797, 1021695, 1021605, 1021327, 1021324, 1021170, 1021065, 1021053, 1020907, 1020724, 1020500, 1099706, 1020244, 1099700, 1019783, 1019724, 1019649, 1019414, 1019262, 1018918, 1018807, 1018658, 1018359, 1099670, 1018056, 1018032, 1017971, 1017952, 1017773, 1017706, 1017692, 1017687, 1017605, 1017529, 1017524, 1017476, 1017276, 1017204, 1016915, 1016879, 1099653, 1016676, 1016611, 1016583, 1016281, 1016154, 1016015, 1016013, 1015641, 1099636, 1015347, 1015307, 1099632, 1014885, 1014264, 1099626, 1014210, 1014132, 1013965, 1013797, 1013615, 1013592, 1013579, 1013570, 1013492, 1013424, 1013304, 1013267, 1013229, 1013114, 1012866, 1012865, 1099595, 1012547, 1012431, 1012026, 1011925, 1011713, 1011618, 1011512, 1011381, 1011328, 1011248, 1011166, 1011140, 1011120, 1011044, 1011021, 1011018, 1010615, 1010527, 1010524, 1010287, 1010057, 1010048, 1009994, 1009961, 1009749, 1009742, 1009724, 1009695, 1009610, 1009527, 1009388, 1009237, 1009183, 1009023, 1008979, 1008977, 1008968, 1008951, 1008947, 1008911, 1099495, 1008515, 1099482, 1007972, 1007959, 1007934, 1007696, 1007691, 1007673, 1007628, 1007606, 1007550, 1007473, 1007242, 1006911, 1006791, 1006751, 1006578, 1006459, 1006199, 1005798, 1099452, 1099451, 1005586, 1005131, 1005113, 1004949, 1004921, 1004258, 1004254, 1004243, 1004240, 1004233, 1004228, 1004167, 1003849, 1003831, 1003590, 1003482, 1003351, 1003319, 1003277, 1003239, 1003003, 1002940, 1002938, 1002889, 1002716, 1099391, 1002585, 1002584, 1002554, 1002482, 1002426, 1002330, 1002274, 1002252, 1002238, 1002197, 1002148, 1002058, 1001999, 1099368, 1001926, 1001903, 1001810, 1001454, 1000864, 1000798, 1000681, 1000574, 1000519, 1000459, 1000319, 1000272, 1102121, 1000083, 1000030, 1000017, 1000006, 1000004, 1000000, 999942, 999836, 999791, 999691, 999685, 999567, 999552, 999550, 999517, 999469, 999439, 999416, 999385, 999356, 1099321, 999192, 999110, 999089, 999086, 998965, 998905, 998891, 998802, 998735, 998681, 998680, 998675, 998658, 998646, 998641, 998591, 998493, 998417, 998309, 998247, 998246, 998174, 998101, 1099290, 998013, 997935, 997932, 1099288, 997878, 997860, 1099284, 997744, 997713, 997649, 997648, 997542, 997481, 997449, 997351, 997086, 997044, 996922, 996835, 996825, 996805, 996623, 996414, 996328, 996272, 996181, 996119, 995806, 995805, 995787, 995756, 995380, 995280, 995221, 995141, 994867, 994830, 1099226, 994792, 994688, 994533, 994479, 994397, 1099219, 994338, 1099217, 994228, 994133, 994085, 994005, 993996, 993987, 993834, 993821, 993795, 993748, 993627, 993320, 993255, 993178, 993153, 993041, 992950, 992949, 992946, 992840, 992839, 992677, 992660, 992659, 992605, 992559, 992535, 992531, 1099178, 992433, 992407, 992367, 992365, 992363, 992257, 992224, 992193, 992191, 992132, 992120, 991854, 991832, 991782, 991762, 991685, 991471, 991342, 991324, 991210, 991207, 991171, 991111, 991044, 991032, 990995, 990938, 990852, 990841, 990763, 990649, 990414, 990375, 990345, 990026, 989894, 989870, 989831, 989573, 989530, 1099105, 989296, 989213, 989042, 988954, 988915, 1102400, 988787, 988745, 988743, 988710, 988653, 988636, 988540, 988504, 1099072, 988253, 988124, 988122, 988121, 988119, 1099065, 987845, 987823, 987822, 987809, 987671, 987573, 987567, 987502, 987309, 987237, 987230, 1099050, 987192, 987183, 987066, 986936, 986852, 986793, 986791, 986733, 986494, 986484, 986427, 986411, 986316, 986210, 986162, 986068, 985461, 985433, 985431, 985372, 985360, 985275, 985259, 985173, 985167, 985165, 985158, 984992, 984948, 984856, 984774, 129837, 241405, 61452, 173001, 197024, 81993, 186446, 86624, 98817, 246626, 373121, 240504, 112035, 141353, 11006, 235832, 96379, 1098967, 298565, 86094, 141694, 320117, 281002, 353623, 1098953, 60357, 58583, 262974, 334754, 36214, 96749, 181394, 296993, 75608, 83448, 270603, 1098927, 183046, 362845, 164528, 244821, 95409, 293401, 1098909, 176015, 323798, 10157, 137919, 8854, 1098905, 89777, 97895, 149447, 239516, 299350, 323535, 1098895, 1098874, 253678, 160562, 282530, 166043, 357162, 328629, 1098860, 122440, 53813, 10312, 88577, 1098846, 109276, 15382, 29097, 185009, 98682, 230082, 194531, 168069, 1098809, 1098806, 1098804, 168238, 242219, 127315, 203688, 176994, 160255, 47864, 292676, 222954, 36965, 272500, 2962, 125545, 1098765, 11133, 1098763, 118365, 172981, 96310, 276338, 80590, 131665, 125996, 27618, 210690, 334916, 136209, 92437, 24979, 277785, 227591, 249321, 136098, 307521, 1098698, 264594, 169778, 158887, 135516, 15607, 237945, 164912, 125627, 20597, 339888, 276298, 234651, 129565, 12903, 145821, 180592, 1098646, 176677, 9926, 1098641, 265960, 172787, 94865, 135386, 234998, 100616, 1098608, 305361, 61882, 338713, 1098600, 102506, 128113, 44072, 128200, 334433, 329901, 153027, 90941, 197964, 49802, 184452, 229325, 231292, 273481, 30188, 244808, 101451, 191971, 108622, 150087, 182393, 181222, 144491, 258485, 292094, 1098523, 277632, 1098520, 97295, 188908, 1098510, 107812, 310853, 208339, 1098497, 149790, 132263, 106508, 314907, 371695, 1098481, 12741, 305333, 28216, 20671, 320320, 86264, 220151, 316803, 70340, 223468, 59217, 276329, 236580, 130932, 139239, 206549, 234821, 93308, 174273, 278863, 199572, 285656, 31432, 347491, 207251, 54531, 56033, 300312, 107077, 160885, 209651, 1098355, 143464, 1098354, 183201, 1098338, 222158, 159667, 61180, 1098322, 99183, 85954, 153794, 239189, 195693, 209764, 190307, 343976, 29169, 86701, 24115, 123975, 167436, 160339, 267644, 119534, 10276, 21765, 119975, 165807, 195582, 114037, 282397, 1102028, 165480, 279718, 1098222, 103125, 181144, 55691, 212236, 359499, 119168, 19457, 162351, 371204, 190212, 183874, 357664, 259128, 1098180, 249792, 232703, 1098169, 133037, 226461, 318073, 277701, 183723, 228474, 62648, 72613, 53814, 17848, 139897, 328611, 154633, 259239, 137411, 22882, 309402, 114638, 324645, 280927, 311067, 1098111, 118702, 1098110, 76770, 201366, 195440, 1098102, 142411, 234165, 1098090, 337073, 167156, 18101, 75342, 267012, 193742, 36473, 15039, 264410, 161224, 286160, 132359, 191632, 1098057, 170788, 1098048, 136700, 242713, 186390, 1102390, 324159, 196111, 78730, 40056, 9454, 295406, 184436, 78076, 265729, 212195, 152519, 126491, 57402, 139929, 131873, 185276, 27932, 160787, 323154, 377805, 1098013, 57882, 1098010, 334904, 20356, 303777, 367290, 284072, 1097999, 236949, 288884, 58801, 200296, 142039, 260762, 253965, 46579, 85904, 92260, 167566, 146244, 338040, 123710, 306105, 299094, 178468, 337209, 1097939, 74759, 202797, 1097937, 47741, 277799, 308687, 58571, 1102001, 323555, 259885, 1097909, 1097906, 255027, 1097905, 132104, 126525, 1097894, 142382, 1097885, 185299, 264150, 39577, 289556, 290499, 107283, 57614, 74356, 310948, 211621, 1101995, 294518, 329958, 157149, 149853, 204924, 144857, 227637, 207595, 337190, 113664, 181301, 237561, 62439, 362076, 1097796, 88284, 139767, 13397, 239511, 214040, 226509, 1097786, 202073, 250367, 313940, 326509, 236427, 32642, 267187, 51276, 145877, 256052, 10205, 112718, 342115, 244902, 212634, 323096, 46095, 1097723, 65583, 1097721, 64179, 139090, 299939, 184235, 196596, 154301, 82293, 267341, 1097674, 17635, 262232, 93649, 285537, 51090, 34039, 1215, 335711, 66154, 276208, 144694, 21861, 250636, 299110, 302337, 163602, 31595, 146598, 70852, 271038, 30956, 193866, 249802, 116939, 69506, 328474, 189466, 1097602, 1101977, 140804, 325292, 203390, 298550, 30860, 143293, 192894, 283154, 551309, 65038, 448630, 459280, 757275, 116820, 128772, 432653, 550565, 573954, 37952, 706950, 492853, 451406, 571103, 699510, 418353, 441409, 438286, 533105, 689851, 694561, 405660, 163860, 559507, 464860, 583234, 392393, 431602, 298940, 560673, 709342, 409854, 398447, 633399, 193581, 559198, 613852, 390484, 656250, 407131, 742822, 606944, 1097508, 433691, 701335, 217246, 455776, 723144, 409143, 444790, 408765, 515335, 758074, 483795, 591898, 468762, 462301, 580411, 1097469, 450921, 632394, 1097461, 557157, 406576, 543849, 60677, 1097448, 663006, 129229, 539957, 615383, 152598, 1097438, 448976, 756949, 719411, 562827, 70709, 575616, 387848, 539601, 463373, 724872, 483521, 257885, 566335, 523621, 466640, 704236, 619087, 1097386, 184105, 1097373, 180902, 675320, 643572, 709560, 1097359, 593732, 453220, 696404, 463443, 591940, 577813, 558263, 442525, 610940, 167229, 433220, 704072, 1097317, 732631, 340712, 755040, 454018, 1097314, 569674, 172062, 587524, 372070, 419692, 1097304, 1097298, 471705, 503381, 400696, 731759, 666694, 456016, 701390, 559959, 699873, 704398, 743046, 717845, 499904, 463133, 455659, 610056, 398258, 669979, 487279, 736347, 653041, 459948, 417040, 400692, 740762, 738165, 556489, 571954, 167994, 648877, 592495, 735387, 1097242, 703268, 480932, 1097236, 6217, 741392, 602957, 528760, 1097223, 412352, 709936, 616045, 437914, 732618, 715189, 189115, 1097213, 446834, 392936, 515317, 710297, 426442, 718444, 565696, 1097195, 587674, 495680, 510867, 679390, 588829, 432874, 470611, 424092, 753479, 412597, 406181, 632825, 417404, 674702, 15063, 496276, 1097154, 687632, 147542, 511101, 523952, 489513, 685091, 565868, 594930, 1097135, 735895, 548254, 748997, 627085, 695238, 1097119, 422624, 1097118, 519145, 395038, 592192, 1097100, 608557, 1097093, 611152, 742667, 1097087, 423608, 448975, 514767, 727551, 1097066, 678176, 432602, 606117, 27743, 694063, 1097040, 485287, 610898, 637208, 517245, 648119, 1097027, 39360, 537526, 1097023, 707670, 1097014, 261098, 517117, 604673, 19940, 1096998, 479284, 646354, 691141, 73788, 601624, 330560, 568841, 594831, 1096964, 442673, 703211, 138640, 457622, 1096958, 750487, 459481, 558448, 189312, 755465, 733422, 404713, 663820, 1096947, 1096945, 418063, 507381, 1096944, 688739, 436844, 605169, 428113, 48417, 745559, 716641, 469873, 509907, 263889, 727837, 54843, 407102, 420304, 634583, 1096911, 416846, 655057, 512405, 568649, 478359, 653187, 445494, 559018, 91345, 690010, 754191, 476807, 112318, 708904, 1096887, 1096886, 467597, 592601, 585344, 679360, 757644, 415500, 635058, 474873, 622100, 685177, 299023, 333700, 427086, 1096870, 580313, 256783, 733186, 747345, 1096866, 684977, 387864, 488676, 575268, 613318, 570068, 147166, 1096850, 747720, 711682, 643561, 1096840, 497132, 1096830, 290779, 1096827, 256192, 635237, 691507, 1096823, 402417, 636853, 469535, 639163, 581521, 751778, 386653, 424509, 454258, 554511, 453270, 586916, 478827, 1096787, 656371, 477648, 649640, 1096776, 365044, 585680, 524722, 685591, 497360, 489374, 568895, 682105, 476483, 634126, 413040, 735343, 433549, 1288, 494730, 659929, 1096742, 696217, 417362, 739599, 710755, 669444, 117683, 633350, 131768, 337864, 495082, 1096712, 686290, 590861, 477380, 415165, 521402, 541135, 389258, 711803, 589564, 497596, 759503, 480064, 461491, 596088, 466774, 406974, 714672, 1096667, 680514, 635125, 1096658, 1096656, 589777, 424449, 645024, 456305, 399364, 1096644, 724947, 1096641, 494086, 626232, 277977, 372378, 1096628, 550331, 303045, 288702, 645604, 539648, 1096620, 503674, 1096610, 1096607, 641156, 505107, 505810, 626462, 397090, 464484, 484551, 617611, 587326, 670142, 243244, 208265, 455862, 666792, 665972, 670022, 660534, 753168, 517386, 470982, 745944, 590945, 422501, 1096557, 642352, 477639, 1096551, 669046, 412982, 399617, 534941, 497470, 1096543, 576822, 745746, 635647, 1096533, 1096532, 647949, 417080, 582146, 1096527, 613233, 524332, 615457, 637234, 567159, 576851, 547301, 456551, 156215, 148761, 168787, 693152, 142782, 1096509, 245921, 75717, 573157, 560245, 718112, 2, 649763, 555850, 18840, 627513, 1101871, 607374, 753517, 54040, 1096479, 1101870, 1096476, 1096475, 84778, 440098, 42361, 567714, 435794, 453869, 482808, 473492, 1101868, 1096457, 464440, 1096454, 453705, 65584, 620992, 556976, 694560, 407869, 484454, 1096431, 1096429, 667373, 527568, 595577, 461601, 693736, 231482, 443081, 282214, 747937, 467683, 551119, 450093, 696918, 540906, 575492, 588888, 687375, 479570, 708781, 578100, 198581, 441128, 748054, 329515, 560059, 1101861, 686469, 731886, 227968, 405867, 1096376, 1096375, 406205, 507086, 97766, 539278, 511367, 1096368, 490505, 458885, 1096360, 273014, 482412, 613694, 660999, 342450, 413079, 420934, 682025, 627323, 82100, 371420, 592672, 493508, 626701, 640232, 638503, 724579, 437165, 352236, 719488, 274981, 547018, 433680, 731902, 412340, 358150, 510858, 505171, 544060, 453451, 632625, 405090, 409207, 477309, 749955, 504306, 436249, 1096268, 448035, 530572, 1096262, 573899, 97972, 185397, 506985, 1096258, 1096257, 1096256, 610716, 19552, 436475, 1096252, 648049, 662436, 652556, 536480, 414714, 1101845, 687615, 505541, 651187, 759062, 754786, 443489, 249821, 506438, 1096207, 499568, 663679, 428773, 421145, 600350, 690606, 657204, 697780, 556248, 427323, 574944, 220761, 408563, 526331, 555558, 440362, 497757, 391481, 1096180, 601684, 585165, 586268, 368229, 734979, 466335, 667535, 504335, 677460, 509730, 115365, 418926, 693469, 427730, 375291, 649200, 614598, 478295, 674914, 659230, 88160, 722413, 261521, 1096126, 738525, 142579, 393881, 574317, 408739, 481961, 397592, 614409, 525660, 463635, 171776, 632536, 630905, 573452, 473319, 605363, 746055, 697983, 403361, 406525, 510444, 755459, 1096087, 669800, 739671, 534305, 695993, 496175, 722981, 288200, 1096065, 744764, 680951, 548099, 588122, 409887, 433685, 593541, 464663, 569939, 434369, 1096049, 758901, 669288, 59204, 1096045, 650076, 1096044, 513061, 66161, 116517, 663950, 384985, 658498, 1096025, 596716, 231109, 561448, 649110, 745402, 743696, 76283, 570979, 544123, 646179, 434462, 160735, 735384, 442593, 225499, 722515, 656859, 566946, 413905, 724121, 1101822, 682365, 1095994, 77424, 714678, 221664, 93234, 1095988, 625022, 652912, 97612, 609104, 1095982, 478220, 575096, 417902, 458110, 1095971, 692955, 422600, 583766, 639560, 624876, 425688, 523062, 1095955, 718782, 456734, 597384, 578783, 460162, 750946, 308032, 671692, 1095942, 556476, 409557, 273449, 358240, 418423, 485558, 392501, 486370, 75698, 738162, 586740, 1095928, 676454, 641284, 123859, 601629, 646623, 1095921, 758720, 548673, 641618, 29416, 528841, 277556, 467612, 437671, 739913, 530611, 503401, 290830, 749267, 1095881, 151547, 547089, 192502, 1095876, 1095874, 540432, 607338, 739743, 1095868, 699872, 599550, 687245, 438455, 1095864, 472448, 543813, 570905, 619159, 282411, 595568, 1095857, 445094, 582557, 595236, 148777, 681264, 1095845, 1101806, 242107, 289276, 732448, 423178, 508316, 483178, 481297, 156889, 525534, 40337, 159078, 153981, 425505, 481387, 266920, 1095807, 1095806, 173391, 451609, 574547, 1095798, 392195, 544319, 742988, 552868, 435412, 163038, 447340, 585378, 272815, 172608, 740624, 129205, 436602, 733510, 507087, 508855, 1095749, 299732, 108037, 662524, 730278, 405036, 59392, 717563, 117113, 1095725, 649893, 522076, 420400, 1095716, 393268, 1095711, 426214, 1095704, 418165, 662016, 1095699, 510229, 575146, 758909, 249618, 645252, 1095687, 249866, 408427, 470001, 77323, 624176, 472359, 75266, 633916, 556166, 551819, 754509, 679658, 625205, 574051, 586790, 1095654, 505152, 537410, 636949, 1095650, 524447, 546956, 444350, 593135, 630318, 688644, 1095641, 581975, 681791, 682626, 1095631, 436924, 683045, 639412, 164282, 149801, 70720, 510158, 108507, 744109, 754166, 389385, 458771, 192579, 583916, 665009, 440802, 422955, 608197, 461078, 393462, 47716, 506579, 576312, 431481, 748771, 711710, 1095571, 1095566, 449244, 426504, 420980, 641293, 1095560, 94953, 1095558, 436100, 727707, 1095555, 148424, 321239, 664138, 647876, 280223, 1095542, 658372, 1095537, 717751, 4947, 399527, 605467, 555590, 581801, 237936, 1095495, 535142, 448123, 634113, 593275, 119761, 177221, 547820, 420867, 1095478, 404202, 415962, 1095469, 384845, 458774, 663131, 459707, 569689, 412532, 437752, 744092, 660479, 8714, 663890, 736713, 153037, 414799, 1095437, 451070, 760367, 556307, 275997, 453175, 22670, 415815, 632106, 138793, 443027, 660803, 124787, 407662, 536995, 670476, 32176, 755907, 474234, 706342, 438058, 511330, 583369, 508104, 682190, 638928, 199776, 403793, 663388, 423878, 413404, 662282, 652961, 451484, 414155, 275528, 734198, 263670, 635079, 604153, 667136, 1095377, 743868, 567759, 1095371, 470385, 728060, 577511, 476977, 568585, 1095360, 1095357, 1095354, 625458, 138127, 507434, 637459, 607599, 758519, 698719, 635626, 450854, 1095335, 704223, 1095332, 428819, 436847, 685717, 682205, 644023, 496717, 686260, 754113, 750421, 647687, 630391, 14963, 436091, 275137, 594793, 1101761, 673984, 1095306, 83621, 659182, 401287, 466738, 1095278, 115930, 455782, 562821, 741977, 660957, 701345, 450851, 635044, 611199, 588627, 302435, 488825, 278542, 476947, 741274, 530602, 737940, 584500, 1095233, 591993, 520627, 43781, 437324, 540306, 700835, 653054, 584727, 146812, 619675, 670829, 604628, 701663, 478054, 490883, 327750, 203458, 700641, 661398, 571237, 592220, 680490, 497536, 2235, 703270, 741970, 37685, 1095165, 194750, 521801, 392488, 749752, 1095155, 584905, 388950, 274175, 730229, 334558, 752473, 746065, 532142, 578607, 522953, 422609, 130825, 471007, 469819, 711811, 449235, 607855, 733591, 302878, 1095126, 1095121, 510018, 570725, 129517, 1095108, 624644, 563652, 677212, 729697, 155086, 466162, 691055, 127098, 77878, 425330, 710914, 690801, 562594, 1101739, 1095066, 418552, 478981, 728460, 430142, 1095055, 604229, 117036, 450681, 731723, 572517, 677672, 321363, 432680, 733692, 667932, 418977, 672429, 726076, 1095012, 278429, 438324, 188134, 387662, 570070, 136157, 1094999, 461281, 1094996, 402427, 391125, 589903, 703383, 633986, 708739, 61623, 1094982, 549738, 587853, 703765, 516029, 759038, 686541, 705681, 1094962, 392350, 329369, 450788, 640103, 430229, 245120, 563347, 597395, 617795, 727224, 748672, 752700, 707513, 672109, 624503, 734426, 612471, 657264, 526984, 543251, 459291, 563943, 728823, 753299, 433579, 583798, 449442, 647503, 622725, 503580, 743675, 580450, 745469, 656376, 589586, 388588, 525868, 174592, 526671, 614047, 393203, 1101723, 1094869, 672433, 276979, 693101, 738484, 535599, 507934, 592235, 466252, 420365, 514851, 742022, 691004, 760512, 212796, 1094840, 748321, 577131, 689885, 617968, 490802, 1094825, 455743, 632726, 603031, 541425, 453856, 661076, 604113, 702792, 722352, 547139, 704080, 694678, 603773, 138266, 582641, 1101714, 584569, 455456, 559009, 711840, 565231, 528117, 729672, 129792, 684459, 626005, 689700, 632923, 1094759, 560419, 574569, 112477, 549135, 1094755, 208610, 421813, 688218, 466202, 714709, 345350, 458235, 724571, 608323, 1094724, 633635, 430985, 559709, 398335, 574002, 712832, 750821, 681514, 607292, 467274, 1101706, 639545, 1094691, 521018, 659247, 1094689, 632055, 516413, 338696, 334867, 518940, 495018, 472024, 261683, 559607, 422893, 622658, 178859, 320051, 743708, 496244, 740876, 421437, 211468, 503390, 612846, 153048, 124128, 1094634, 455273, 92509, 612670, 478691, 576195, 93823, 737512, 243712, 453851, 439375, 728150, 1094612, 73853, 611271, 1094605, 624143, 726098, 567452, 541948, 698445, 671219, 740416, 94782, 250228, 1094578, 210442, 601128, 462979, 411953, 6791, 471983, 1094566, 475402, 231717, 642032, 657091, 679167, 102695, 645343, 1094536, 175251, 498478, 677936, 537825, 549235, 396391, 1094519, 711759, 488345, 549219, 1094501, 721885, 555750, 456443, 568526, 1094477, 708517, 25603, 709559, 191792, 1094469, 391101, 1576, 170982, 753214, 1094460, 341317, 1094453, 693636, 481341, 504044, 489858, 556217, 406923, 430989, 413858, 402318, 633153, 231298, 538333, 618408, 249118, 567630, 393954, 628056, 645892, 1102351, 647260, 1094395, 1094394, 1094389, 537761, 331352, 419326, 425375, 598802, 506181, 248086, 559771, 638849, 744891, 560357, 1094370, 445714, 1094369, 571696, 425072, 473935, 1094364, 602352, 1094361, 635497, 563995, 21741, 419333, 693162, 730626, 593792, 482666, 1101674, 96250, 733892, 409071, 1094316, 223165, 387603, 26207, 680373, 452200, 644658, 384406, 468907, 137440, 540983, 494346, 412319, 171527, 635150, 438316, 623857, 402075, 614069, 285729, 634412, 1101670, 669427, 616447, 65000, 609799, 266760, 146212, 161418, 1101668, 1094271, 454872, 714636, 99556, 631724, 21948, 515813, 409694, 88375, 563359, 602652, 574730, 281704, 543951, 242019, 743693, 445908, 584592, 621419, 24441, 403388, 551860, 124534, 486274, 138223, 524166, 673143, 1094220, 715508, 1094215, 522151, 404051, 394021, 416228, 393420, 563771, 670600, 1094197, 1101661, 1094191, 281702, 649451, 583611, 473394, 408945, 470459, 1094175, 414276, 705279, 623281, 489931, 545450, 694845, 531142, 564707, 753480, 628085, 1094141, 448183, 568709, 614186, 230891, 725047, 700224, 521851, 707721, 715588, 524699, 517516, 543644, 1094110, 702790, 447551, 647872, 760070, 525467, 473886, 1094085, 1094081, 674595, 649294, 591026, 537301, 261650, 509111, 405985, 1094062, 1094056, 427340, 457809, 34015, 639084, 455853, 680250, 544308, 424898, 572286, 426347, 199442, 144254, 82161, 565915, 1094027, 449750, 683193, 728110, 576452, 525779, 610425, 605651, 720013, 582848, 503607, 690508, 1093971, 155056, 483241, 64528, 541969, 1093966, 536791, 86203, 1093962, 527769, 1093959, 609956, 549342, 577167, 731736, 700618, 1093941, 284313, 22479, 507221, 405310, 1093927, 1093926, 571474, 100013, 609628, 690705, 713134, 712545, 604954, 499413, 541274, 479525, 106125, 480504, 400311, 50891, 610190, 1093881, 599524, 457714, 605648, 535421, 487569, 591310, 555458, 4696, 525047, 1093855, 744261, 54199, 536654, 653092, 558548, 961921, 978802, 982481, 775343, 138629, 841165, 841020, 846513, 786021, 1066043, 1093791, 939473, 1058978, 858421, 1093786, 345453, 330450, 821372, 855968, 160671, 1093781, 1036002, 783963, 1093773, 1003445, 315131, 841961, 912898, 771694, 961950, 772928, 776465, 991240, 772055, 917334, 1093750, 853057, 787784, 1041473, 1093732, 783277, 792900, 1093723, 65488, 148564, 792742, 29612, 1093717, 212977, 989855, 936273, 987486, 1019470, 884870, 1005191, 998941, 952378, 95286, 258337, 783433, 1020915, 931905, 808716, 1053111, 1093698, 900924, 1057251, 875787, 907997, 976829, 234114, 888100, 865616, 58130, 789439, 1093682, 763878, 790059, 338637, 810242, 808362, 925571, 944700, 170770, 1011860, 815015, 25025, 930326, 1028131, 1093650, 778890, 978057, 1005520, 900731, 1093637, 960397, 862742, 25534, 1093621, 831601, 900076, 981400, 995176, 852037, 1036542, 971233, 914321, 921173, 942221, 944181, 983708, 804197, 988269, 791629, 804905, 831560, 849561, 913509, 1093570, 230725, 831315, 1037407, 837181, 1011811, 1076078, 303934, 959228, 1093556, 869827, 1093552, 783602, 849337, 1011721, 976941, 950799, 863623, 1031240, 1024034, 812734, 1093540, 1078198, 1093534, 773155, 898714, 1023850, 227317, 825954, 1093507, 956993, 202081, 49435, 884878, 1078731, 780336, 893681, 868598, 1038755, 357340, 1093487, 1093481, 968560, 934235, 1051886, 376537, 906901, 860266, 778139, 831474, 853344, 1093443, 241246, 1093438, 822859, 53897, 884722, 140161, 66908, 992618, 762111, 991138, 64960, 981006, 830040, 944231, 948397, 925951, 835478, 1073972, 147337, 1093419, 800792, 790536, 1093410, 800318, 899869, 1093406, 1081569, 244092, 1093399, 831030, 877810, 798284, 837375, 166748, 1016406, 270140, 893271, 148851, 171370, 897476, 1027669, 848478, 329114, 1093359, 959083, 865426, 947678, 1003114, 856171, 779553, 773998, 1093349, 988988, 939104, 927553, 881695, 1035228, 954455, 778948, 881582, 1093322, 1003329, 904295, 373209, 971633, 132639, 1093312, 952388, 1093305, 767745, 845888, 869035, 1004940, 1017734, 931147, 989994, 792463, 789332, 850919, 950139, 1101576, 942651, 779475, 1004199, 934223, 1093255, 1011529, 1033703, 804103, 827791, 903811, 813675, 1009109, 1093238, 776122, 1093235, 176744, 1101566, 1093231, 894610, 873250, 1026098, 948452, 224314, 1039195, 891082, 917489, 321918, 859274, 972699, 944245, 983438, 109647, 1093202, 1046952, 1093200, 1093196, 793475, 844390, 1093181, 1093179, 1057476, 129228, 1093172, 960265, 254652, 80712, 809933, 971213, 803306, 1048565, 839878, 946428, 766769, 910818, 339934, 1093142, 1022577, 1093128, 831962, 788851, 872347, 796056, 130034, 1093112, 971653, 788702, 803599, 1093104, 827801, 786857, 777297, 206806, 989108, 1093096, 74328, 1101552, 1093094, 42568, 866101, 820267, 362016, 262280, 1046648, 155041, 841521, 1093064, 840061, 965578, 1093042, 1061994, 983499, 810680, 1093038, 879869, 1093031, 839528, 861169, 818819, 914637, 975775, 1101535, 958311, 957607, 1093006, 28352, 1070412, 870348, 338917, 800987, 895263, 1092996, 977770, 820973, 61531, 134239, 855546, 892454, 57270, 890532, 875796, 979133, 1010537, 994112, 1092984, 778857, 1101531, 941865, 1029016, 1092978, 824000, 22231, 1037104, 924895, 368728, 1092952, 908154, 762652, 862345, 920717, 943190, 862856, 10264, 978605, 242863, 1092930, 872081, 853882, 1010277, 1092919, 778095, 1092911, 822937, 91722, 837681, 161828, 987660, 837202, 810324, 283344, 889104, 25294, 839128, 782549, 1056548, 794469, 779674, 903479, 934964, 1092865, 1005163, 919913, 1092863, 761096, 1018525, 991894, 1021907, 944194, 1092858, 1037826, 109819, 993234, 1059820, 819618, 1024312, 1092832, 1092822, 2798, 166403, 893275, 766272, 809556, 359040, 801907, 348136, 1092796, 776080, 878959, 1092792, 991590, 1092791, 874914, 993107, 839137, 809798, 904007, 948797, 795991, 999555, 817349, 96602, 979054, 70504, 980633, 960437, 783687, 782696, 809909, 1092759, 1092757, 786009, 1092756, 341736, 930483, 783098, 1040684, 855050, 956403, 881723, 856568, 1033912, 994918, 1092738, 823203, 1072506, 1075156, 929046, 953351, 1092724, 1003006, 69871, 111377, 968608, 791223, 865518, 1092715, 789037, 866251, 1079868, 897789, 922398, 844211, 846082, 952452, 922335, 869348, 992652, 967106, 926019, 985644, 902657, 880527, 1092665, 766804, 950355, 1063177, 1101503, 190601, 924398, 1078920, 1092643, 300246, 762558, 999028, 885153, 924567, 837467, 850957, 913579, 272605, 891565, 935358, 63548, 884533, 1092605, 144285, 278606, 1078187, 260853, 1060616, 1038592, 780613, 1026372, 941219, 969750, 913568, 1052717, 887395, 1023363, 1092557, 1092551, 354222, 320025, 807880, 869759, 1092543, 960302, 1043702, 1092528, 943170, 72485, 888988, 1092522, 1092517, 944451, 1043413, 908069, 1040353, 320970, 823549, 1003875, 885081, 796812, 1092484, 937427, 984499, 196720, 915769, 1092482, 974808, 998381, 1050007, 792688, 1102335, 333579, 1092474, 885308, 842223, 1092470, 322345, 999637, 865476, 135464, 1031456, 896931, 914707, 915762, 1044041, 1092450, 167371, 820161, 1028742, 1078491, 1092441, 766238, 894161, 885986, 859669, 1069556, 862640, 962731, 370316, 897981, 1050747, 1092422, 914406, 1092417, 860655, 1092416, 970152, 1005678, 339501, 882002, 1028652, 1092394, 891498, 909048, 1092391, 255633, 1017348, 782426, 782079, 865971, 160808, 767248, 910150, 875986, 999261, 945535, 990197, 1092348, 1039361, 952658, 1050253, 78418, 991064, 914368, 1038685, 900450, 990481, 1025348, 914771, 814282, 850820, 905707, 1092327, 840532, 212303, 823596, 762761, 1058165, 999610, 1092311, 1040507, 877453, 998093, 979787, 872632, 975997, 789292, 1039346, 1079785, 886332, 116431, 303790, 984434, 1071722, 928753, 796383, 860942, 940386, 1064518, 1092263, 170581, 1092258, 267566, 1092257, 73106, 909547, 1000097, 952445, 854085, 995825, 951820, 882141, 1028555, 1092238, 297019, 69789, 1092237, 1092236, 1024599, 1065448, 1057270, 953274, 801059, 814699, 1025188, 1033580, 1080419, 128178, 760817, 1101466, 995443, 863187, 902919, 877845, 1092203, 770167, 1049200, 879747, 912961, 808528, 991241, 935952, 1092176, 916901, 930293, 83458, 1092168, 1073721, 878840, 1092165, 776609, 1092161, 855725, 798945, 1058425, 1092143, 1031999, 1062457, 761388, 846806, 72809, 922389, 948351, 1056950, 1039495, 1003334, 990010, 908316, 889289, 47270, 777519, 1070728, 1092120, 917536, 1057446, 849596, 997533, 805900, 822649, 840445, 996653, 1071270, 931940, 828596, 1092095, 889046, 1092093, 937578, 1014911, 975495, 813953, 828779, 908237, 156723, 887392, 1101448, 926980, 947974, 868487, 911605, 900696, 1007875, 900062, 831784, 259763, 801478, 54307, 783781, 1040461, 1092042, 760908, 838453, 977952, 1052948, 849245, 820899, 1019830, 1101443, 887398, 1042399, 1092029, 788431, 992340, 839488, 1092023, 26079, 845790, 73119, 187818, 224548, 1102330, 1092007, 786937, 907127, 930621, 776517, 847726, 17586, 1021554, 792847, 776700, 907538, 806688, 798883, 786375, 1091983, 1001108, 899212, 1091973, 143849, 942354, 842272, 935707, 1001381, 813899, 874876, 257018, 1043587, 989647, 157580, 155700, 1091941, 848432, 893789, 890890, 119089, 775355, 114573, 880766, 924047, 860573, 102627, 825147, 989099, 1011003, 1075713, 782381, 824542, 960566, 990784, 1076030, 860078, 874299, 810660, 1057488, 1059646, 1091850, 1014115, 794625, 780993, 1059619, 854862, 1091833, 931772, 1066116, 200062, 922024, 974670, 885433, 825583, 962443, 870544, 769630, 800652, 900599, 803237, 846291, 815243, 244011, 1079141, 829425, 936182, 98847, 927989, 189174, 785176, 1065118, 841919, 906126, 775457, 1091765, 1006922, 991419, 1046750, 1048876, 351820, 947466, 287912, 963788, 830551, 766301, 986325, 1091749, 1006987, 771314, 842070, 1052421, 994941, 842596, 837740, 115833, 963564, 982348, 830531, 1062603, 1056482, 1005500, 773858, 1021241, 59084, 1091719, 822585, 765147, 802634, 987657, 1042676, 1091692, 868410, 1091690, 214771, 1091688, 810210, 849142, 1091681, 803861, 874691, 888777, 1048995, 372674, 842221, 1091667, 909273, 1091665, 913286, 761032, 1056850, 1091661, 1091659, 1091654, 898686, 1065032, 938066, 985653, 896383, 1091643, 928567, 837372, 1091633, 824920, 1091630, 824938, 1031118, 873914, 987720, 935364, 1037341, 1044755, 1004493, 800243, 1091595, 859229, 771170, 1091576, 770604, 1091569, 897240, 58234, 891719, 955117, 918324, 1024950, 905479, 196963, 864507, 933946, 964577, 852179, 128633, 842333, 989644, 1014697, 161117, 834848, 1091529, 7968, 1015766, 257335, 1029681, 145569, 1091522, 818842, 1091520, 831302, 1091513, 900164, 1010700, 900077, 245416, 1032011, 908489, 850555, 810270, 948532, 832508, 868184, 813605, 939020, 964152, 911056, 251172, 920885, 995029, 991598, 988149, 1051095, 1049368, 1074499, 1091471, 764139, 970242, 831871, 1091467, 770233, 1091461, 873886, 1047088, 1063709, 955093, 784549, 902855, 1091450, 970824, 940916, 912899, 780297, 991383, 988294, 792977, 998482, 860071, 984930, 77391, 75335, 866428, 1091421, 853646, 1015556, 772129, 47588, 1021446, 932878, 859376, 980789, 1102325, 937947, 143025, 872869, 1080968, 968004, 1091384, 765583, 1042488, 806574, 886382, 921348, 836832, 808235, 882982, 1073980, 853471, 180693, 1091360, 1038871, 960003, 1049221, 1033759, 903097, 1010607, 1091340, 785721, 1091337, 957688, 1040694, 194724, 1009959, 1091330, 788035, 46081, 1058100, 301061, 973917, 1038849, 969264, 953355, 860542, 175625, 802794, 194870, 888911, 1057168, 1101374, 995526, 1016869, 1047592, 298444, 1051990, 985905, 1091269, 830649, 850450, 870875, 357519, 1091264, 1023838, 855031, 958993, 1091255, 795951, 939744, 954711, 200600, 851490, 1063659, 932495, 994582, 1091246, 236708, 1091234, 238886, 804687, 149670, 897401, 812190, 842108, 1045203, 868919, 1041714, 1060305, 968310, 1009668, 1036800, 860462, 1091206, 164946, 769085, 904727, 844128, 1059442, 1091194, 155119, 864905, 1091189, 97964, 798469, 761963, 326410, 1016703, 1059801, 1091177, 970830, 786674, 16860, 1091173, 1091163, 888796, 909506, 1091158, 991938, 905766, 794319, 795540, 829025, 952520, 909176, 872655, 783843, 1003997, 1050670, 1016790, 1080495, 956624, 1017892, 815320, 1013367, 1091115, 1091112, 811852, 1091108, 913137, 1057757, 831380, 929473, 921812, 1079434, 1021679, 822642, 248385, 946747, 1091082, 51054, 909886, 996042, 869891, 879150, 1091068, 883861, 925059, 1058822, 790178, 781877, 1091059, 1081321, 1049867, 993876, 91790, 131925, 855029, 907173, 1058717, 828093, 1091048, 898631, 829050, 59190, 760930, 865660, 979713, 36388, 1068408, 1036999, 984075, 1022762, 918424, 969974, 843140, 768133, 941749, 879155, 798253, 980726, 832188, 1091015, 999897, 878367, 1032074, 783822, 845719, 764691, 904389, 926064, 910375, 70787, 1090987, 792187, 863817, 1070867, 980168, 909221, 203274, 885184, 763084, 994311, 25036, 1101341, 1090965, 84520, 952047, 854785, 863738, 808200, 818612, 935362, 980811, 919712, 1027785, 999641, 846438, 772864, 948829, 989543, 1090924, 888559, 804996, 995654, 993544, 969066, 220495, 1090910, 992729, 787255, 1015055, 938773, 286915, 942915, 1067990, 1090887, 1090886, 917022, 826518, 847415, 874455, 193422, 804523, 1090877, 780850, 958142, 953332, 988960, 885505, 969023, 1053997, 1068924, 1090869, 946825, 993419, 788484, 1090861, 972064, 829087, 1080537, 786477, 1065985, 773924, 1090842, 1090841, 1077000, 1090838, 1090833, 1001981, 865384, 810394, 355458, 311540, 867947, 93311, 858391, 1090808, 917015, 1025895, 885301, 791140, 1071061, 1090796, 822218, 820027, 1090791, 833507, 995789, 1090789, 153739, 156052, 1049767, 1090758, 789997, 792789, 857943, 49943, 1027865, 905604, 931726, 278658, 1090742, 784961, 993501, 818421, 851813, 1090730, 135079, 775297, 1090727, 995212, 888934, 815891, 961048, 818798, 97652, 1078752, 878817, 833268, 1090701, 1090700, 867490, 898318, 323592, 938359, 843409, 875417, 1064155, 996317, 816483, 348994, 823421, 798967, 149767, 240489, 28442, 205741, 240584, 62411, 264284, 138492, 186727, 38608, 144028, 275534, 54235, 135633, 236582, 348594, 58409, 1090624, 358455, 42555, 1090613, 26485, 55848, 113826, 126821, 60339, 375891, 1090596, 184916, 247717, 50833, 168175, 340006, 326190, 99461, 84473, 166625, 38098, 128158, 197542, 108287, 153588, 156688, 1090558, 227992, 75801, 1090550, 326637, 194430, 1090542, 1090540, 1090537, 1090530, 59426, 1101300, 165335, 176065, 166784, 53109, 49234, 1090513, 174344, 370979, 1101298, 166111, 72435, 260172, 59030, 243139, 79763, 156251, 291396, 186265, 169305, 307504, 355484, 137508, 186063, 1101296, 289586, 277737, 249176, 344955, 1090458, 81649, 203039, 56188, 35996, 36703, 44686, 25344, 121017, 72398, 129491, 30039, 148016, 1090413, 57411, 54819, 57258, 199837, 300306, 1090400, 1090399, 149161, 215603, 165135, 59654, 296441, 160312, 149221, 1090377, 100661, 196250, 206762, 283141, 111995, 55682, 1090358, 1090352, 1090350, 81945, 188714, 291248, 205251, 305650, 100250, 1090329, 82842, 288139, 180887, 88831, 171906, 1090311, 290091, 1101282, 326719, 147073, 243761, 162662, 247194, 195199],
+    'msmarco-doc-test': [355339, 1035339, 943613, 1051868, 876108, 770613, 84901, 928755, 895787, 920435, 1009016, 923221, 1126106, 988661, 975821, 154441, 802079, 184355, 938754, 1008125, 794144, 51577, 1126090, 1073795, 767703, 937603, 876154, 853437, 275951, 773040, 813841, 1135894, 117487, 927093, 1126069, 804764, 832389, 1126064, 808540, 963267, 909560, 334918, 1010426, 955087, 886048, 767404, 898402, 849376, 970577, 7869, 1126035, 889718, 82412, 809339, 68610, 891083, 346202, 835783, 1024923, 973416, 67937, 1019366, 857232, 989398, 787957, 889757, 907576, 1037969, 957990, 1037871, 1073093, 848496, 147746, 960571, 932412, 993677, 997024, 800980, 927899, 1053885, 1018475, 943566, 929863, 1079280, 943913, 839899, 128604, 1061763, 809683, 1036759, 798642, 57068, 977156, 1125956, 913435, 1125954, 60634, 276099, 913041, 993117, 789981, 941780, 1125930, 904849, 1125924, 1024250, 128322, 1125920, 1125919, 845751, 943412, 1056425, 978017, 796563, 53330, 1054994, 794701, 990049, 305251, 1074350, 773878, 1135875, 855288, 952866, 792115, 848431, 996851, 1056576, 1031850, 1035354, 1125864, 271481, 936261, 789305, 1416, 842609, 900690, 793245, 983196, 816456, 851318, 899008, 811001, 903976, 1125841, 902410, 931576, 959564, 955763, 1125829, 1013640, 65752, 828649, 992308, 105367, 1054386, 1125820, 58374, 763534, 957181, 823034, 858790, 28453, 984702, 8234, 871720, 778062, 1079501, 1008523, 188803, 761705, 1006025, 991361, 803911, 1125782, 982127, 812148, 34925, 965627, 1001465, 168906, 1125763, 1070541, 203578, 788201, 1032694, 903643, 822527, 810947, 323665, 909052, 912272, 902935, 1125745, 980205, 880930, 950222, 1011337, 803862, 1046316, 1056710, 816915, 996237, 968847, 1135859, 912165, 1125706, 1074076, 944345, 301180, 966925, 784039, 1125694, 971998, 76945, 1125690, 1125688, 779295, 905638, 875806, 1125684, 54659, 1125680, 1008285, 1135856, 898753, 20530, 1003074, 875793, 341529, 841979, 1056910, 857280, 792006, 23822, 842753, 1043433, 128291, 1125651, 1026391, 869721, 1074859, 997654, 1125645, 972896, 839707, 318781, 1050274, 1019841, 1125633, 855922, 886682, 1033989, 1125628, 275173, 765070, 1125626, 230103, 796808, 280245, 851807, 790280, 1077110, 1019506, 1048410, 1029806, 1125599, 124943, 824765, 906203, 1125596, 198905, 166606, 1125592, 1125591, 1125590, 1010376, 1125587, 965313, 1125583, 803296, 1000865, 1135848, 1019720, 928128, 991278, 991583, 128365, 788278, 863720, 769008, 1018290, 1125559, 760825, 122724, 261830, 1125556, 1125555, 842923, 775366, 900867, 989196, 1030770, 767991, 882803, 1005440, 841671, 943638, 1060768, 864153, 806779, 819755, 799323, 1003747, 253837, 893530, 859732, 921193, 1066463, 1125519, 902014, 939096, 946406, 1125510, 24480, 798003, 1135841, 1125495, 368106, 853522, 204701, 928453, 924975, 854766, 814340, 886243, 1059906, 1125481, 1078827, 989869, 1073638, 837433, 962187, 1036844, 983787, 861435, 18793, 311494, 1125459, 1125455, 1064450, 947416, 809209, 972396, 306216, 1125443, 931357, 790199, 132469, 1044869, 836655, 881324, 885159, 949501, 1058138, 340540, 1076490, 909115, 919673, 1125409, 1125406, 1023476, 971528, 1076183, 840845, 994762, 1125395, 980406, 1125394, 204957, 354123, 775487, 762865, 808343, 788151, 964554, 1067743, 836007, 840770, 825151, 871767, 222861, 299781, 153374, 1125352, 219844, 208394, 261661, 355519, 146170, 1125347, 180979, 220352, 153239, 323874, 175228, 61240, 49429, 121109, 165037, 282280, 336011, 121113, 171824, 116659, 9904, 9975, 282050, 233856, 78181, 1135818, 199508, 57774, 224261, 206738, 228769, 1125306, 21227, 141078, 1125292, 190377, 184333, 297682, 130610, 1125273, 1125272, 68896, 96597, 202664, 128757, 307758, 278239, 1125260, 334263, 303070, 1125251, 145104, 208344, 236824, 1125245, 77398, 84713, 24841, 110736, 1125238, 148515, 319235, 50800, 1125227, 300375, 310642, 281106, 307403, 190070, 161474, 136578, 189365, 20440, 14448, 249267, 28862, 1125194, 60902, 204851, 17077, 336236, 161434, 112638, 282352, 21075, 341207, 156479, 1125153, 329704, 261295, 114048, 213365, 374724, 340145, 122049, 92622, 314064, 247025, 168854, 11304, 233178, 76591, 157744, 31548, 1125111, 150926, 92713, 276665, 150029, 178677, 194563, 237689, 378218, 1125086, 324242, 235280, 105183, 1125079, 1135796, 24280, 1125075, 79457, 88200, 174034, 130306, 139285, 111573, 102366, 198015, 158054, 205433, 91055, 343439, 275968, 254923, 23367, 12166, 1125041, 115254, 28653, 300384, 184640, 219898, 50782, 84257, 1136966, 281922, 319757, 313747, 81842, 37122, 23986, 1125015, 231134, 1125013, 179395, 158569, 176276, 275413, 120398, 55454, 253834, 118372, 1124998, 323294, 305205, 85798, 143889, 230878, 1124990, 1124989, 121025, 49984, 118151, 1124982, 276525, 180091, 307344, 186484, 82578, 246327, 53422, 96443, 255889, 1124958, 87592, 1124957, 144952, 1124953, 144498, 1135780, 52199, 285049, 198444, 302038, 122795, 122298, 235309, 360650, 60301, 59722, 25398, 202245, 1124926, 130951, 1124915, 1124882, 1124872, 1124863, 1124803, 1124767, 1124753, 1124703, 1124699, 1124663, 1124621, 1124573, 1124569, 1124549, 1124542, 1124534, 1136837, 1135738, 1124531, 1124522, 1124504, 1124501, 1124480, 1135727, 1124472, 1124469, 1124462, 1124451, 1135722, 1124391, 1124388, 1124373, 1124369, 1124335, 1124324, 1124306, 1124300, 1124276, 1124251, 1124226, 1124221, 1124198, 1124194, 1124171, 1124170, 1124160, 1124159, 1124122, 1124114, 1124093, 1124090, 1124087, 1124067, 1124059, 1123997, 1136830, 1123971, 1123969, 1123968, 1123953, 1123930, 1123917, 1123915, 1123888, 1123840, 1123837, 1123822, 1123776, 1123765, 1123761, 1123721, 1123709, 1123636, 1123626, 1135625, 1123603, 1123584, 1123544, 1123492, 1123488, 1123469, 1123465, 1135606, 1135605, 1123435, 1123397, 1123383, 1123337, 1123298, 1123211, 1123209, 1123191, 1123168, 1123112, 1123103, 1123090, 1123074, 1123057, 1123055, 1123052, 1135570, 1123034, 1123028, 1135568, 1135563, 1122957, 1122936, 1122915, 1122908, 1122892, 1122859, 1135553, 1122853, 1136818, 1122792, 1122785, 1122776, 1122772, 1122760, 1122745, 1122706, 1122695, 1122690, 1122686, 1122662, 1122658, 1122652, 1122648, 1122643, 1135533, 1122610, 1122606, 1122601, 1122594, 1122593, 1122591, 1122586, 1122584, 1122569, 1135525, 1122504, 1135522, 1122501, 1122488, 1122476, 1122471, 1122446, 1122442, 1122409, 1122352, 1122348, 1122343, 1122342, 1122341, 1122336, 1122334, 1135498, 1122316, 1122306, 1122305, 1122283, 1122271, 1122267, 1122255, 1122247, 1122242, 1122237, 1122234, 1122233, 1122222, 1122220, 1122212, 1122168, 1122155, 1122087, 1122082, 1122064, 1136811, 1121993, 1121967, 1121963, 1121941, 1121931, 1121922, 1135448, 1121892, 1121875, 1121861, 1121860, 1135438, 1121830, 1121817, 1121814, 1121799, 1121794, 1121759, 1121748, 1121673, 1121667, 1121642, 1121641, 1121631, 1121618, 1121576, 1121566, 1121532, 1121523, 1121474, 1121466, 1121459, 1135397, 1135395, 1121426, 1121424, 1121412, 1121380, 1121374, 1121369, 1121333, 1121327, 1121309, 1121268, 1121260, 1121251, 1121249, 1121191, 1121167, 1121162, 1121156, 1121118, 1135362, 1121083, 1121082, 1121068, 1121050, 1121044, 1121022, 1121000, 1120994, 1120986, 1120982, 1120963, 1120945, 1120926, 1120919, 1120904, 1120891, 1120887, 1120867, 1120842, 1120835, 1120834, 1120776, 1120775, 1120773, 1120744, 1120726, 1120706, 1120689, 1120685, 1120678, 1120676, 1120672, 1120668, 1135301, 1120633, 1120621, 1120619, 1120606, 1120599, 1120576, 1120574, 1120564, 1120563, 1120559, 1120541, 1120537, 1120519, 1120515, 1120466, 1120462, 1120453, 1135280, 1120399, 1120395, 1135274, 1120391, 1120375, 1120361, 1120348, 1120328, 1120316, 1120268, 1120261, 1135262, 1120253, 1120248, 1120236, 1120189, 1120187, 1120180, 1120167, 1120089, 1120084, 1120049, 1120041, 1120019, 1120006, 1135238, 1135234, 1119953, 1119943, 1119904, 1119884, 1119872, 1119862, 1119828, 1119764, 1119744, 1119740, 1119695, 1119627, 1119620, 1119603, 1119597, 1119593, 1119531, 1119529, 1119514, 1119501, 1135190, 1119444, 1119384, 1119374, 1119355, 1119347, 1119338, 1119316, 1119307, 1119305, 1119280, 1119271, 1119259, 1119230, 1135165, 1119189, 1119179, 1119169, 1119168, 1119167, 1119132, 1119128, 1119112, 1119110, 1119097, 1119076, 1135150, 1119040, 1119038, 1119021, 1119015, 1119013, 1119006, 1135142, 1118976, 1118974, 1118954, 1118953, 1118941, 1118927, 1118926, 1118921, 1118889, 1118884, 1118879, 1118871, 1118869, 1118868, 1118820, 1118806, 1118799, 1118797, 1118793, 1118792, 1135121, 1118768, 1118759, 1118734, 1118677, 1118676, 1118671, 1118659, 1118651, 1118641, 1118627, 1135106, 1118595, 1118585, 1136771, 1135094, 1118456, 1118455, 1118435, 1118434, 1118429, 1118416, 1118388, 1135081, 1118310, 1118294, 1118293, 1118286, 1118259, 1118230, 1118229, 1118227, 1118226, 1118209, 1118199, 1135052, 1118176, 1118172, 1118169, 1118145, 1118140, 1135042, 1135039, 1118042, 1118014, 1118012, 1135028, 1117935, 1117925, 1117901, 1117875, 1117872, 1117858, 1117826, 1117798, 1117787, 1117771, 1117767, 1117765, 1136763, 1134998, 1117740, 1117725, 1117709, 1117708, 1117700, 1117691, 1117689, 1117672, 1134987, 1117650, 1117623, 1117616, 1117589, 1117584, 1117581, 1117579, 1134978, 1117566, 1117542, 1117505, 1117495, 1134967, 1117451, 1117450, 1117446, 1117405, 1117402, 1117398, 1117394, 1117375, 1117361, 1117357, 1117350, 1117343, 1117337, 1117331, 1117313, 1117307, 1117299, 1117295, 1117294, 1134949, 1117271, 1117263, 1117261, 1134945, 1117235, 1117206, 1117183, 1117182, 1117178, 1117154, 1117150, 1117148, 1134931, 1117066, 1117062, 1117055, 1117033, 1134926, 1116996, 1136756, 1116903, 1116896, 1116877, 1116871, 1116867, 1116864, 1116862, 1116846, 1116845, 1116821, 1116816, 1116776, 1116775, 1116763, 1116728, 1116706, 1116702, 1116695, 1116694, 1116663, 1116657, 1116643, 1116633, 1116612, 1116606, 1116592, 1116554, 1116553, 1116537, 1116531, 1116467, 1116452, 1116433, 1116429, 1116419, 1116402, 1134871, 1116369, 1116368, 1116353, 1116324, 1116304, 1134862, 1116301, 1116273, 1116265, 1116264, 1116260, 1116242, 1116234, 1116228, 1116221, 1134853, 1116211, 1116201, 1134850, 1116180, 1116169, 1116168, 1116164, 1116162, 1116161, 1134846, 1116139, 1116134, 1116121, 1116112, 1116103, 1116096, 1116092, 1116090, 1134839, 1134838, 1116037, 1116028, 1116025, 1116021, 1116019, 1116016, 1116015, 1134835, 1115983, 1115970, 1115961, 1115949, 1115933, 1115929, 1115881, 1115870, 1115819, 1115784, 1115783, 1115760, 1115748, 1115716, 1134807, 1134806, 1115693, 1115677, 1115660, 1115656, 1115651, 1115650, 1115649, 1115617, 1115599, 1115595, 1115586, 1115585, 1115584, 1115539, 1115526, 1115511, 1115485, 1134784, 1115462, 1115432, 1115425, 1115423, 1115388, 1115372, 1115339, 1115334, 1115332, 1115325, 1134769, 1115281, 1115255, 1115248, 1115206, 1115197, 1115191, 1115172, 1134752, 1115154, 1115118, 1115109, 1115106, 1115100, 1115097, 1115086, 1115072, 1115030, 1115021, 1115004, 1114979, 1114974, 1114962, 1114947, 1114905, 1114901, 1114882, 1114870, 1134723, 1114838, 1114828, 1114805, 1114782, 1114757, 1114753, 1114743, 1114739, 1114700, 1114690, 1114669, 1114660, 1114655, 1114654, 1114650, 1114634, 1114589, 1114588, 1114585, 1114584, 1114542, 1114524, 1114510, 1114502, 1114498, 1114495, 1114488, 1114476, 1114471, 1114460, 1134676, 1114428, 1114423, 1114420, 1114402, 1114383, 1114358, 1134666, 1114290, 1114275, 1134656, 1114236, 1114206, 1114200, 1114198, 1114188, 1114185, 1114164, 1114149, 1114131, 1114125, 1114108, 1114099, 1114093, 1114092, 1134639, 1114078, 1114066, 1136728, 1114055, 1114047, 1114044, 1113970, 1113959, 1113944, 1113877, 1113870, 1113861, 1113847, 1113840, 1136726, 1134614, 1113808, 1113802, 1113796, 1113792, 1113767, 1113756, 1113751, 1113724, 1113709, 1113699, 1113683, 1113654, 1113622, 1113608, 1136724, 1113597, 1113545, 1113528, 1113526, 1113520, 1134583, 1113506, 1113498, 1113496, 1113461, 1113439, 1113429, 1113425, 1113416, 1113398, 1134572, 1113381, 1113380, 1113353, 1113352, 1113347, 1113318, 1113307, 1113304, 1113269, 1113258, 1134560, 1134558, 1113231, 1134557, 1113201, 1113175, 1113170, 1113163, 1113158, 1113148, 1113147, 1134552, 1113125, 1113092, 1113090, 1134539, 1113073, 1113041, 1112954, 1112947, 1112944, 1112939, 1112928, 1112908, 1112897, 1112847, 1112838, 1112827, 1112819, 1134500, 1134499, 1112770, 1112709, 1112705, 1112663, 1112658, 1112656, 1112614, 1112606, 1112596, 1112568, 1112514, 1112506, 1112487, 1112486, 1112442, 1112396, 1112390, 1112384, 1112382, 1112375, 1112366, 1112327, 1112324, 1112313, 1134449, 1112302, 1112297, 1112291, 1134444, 1112250, 1112240, 1112234, 1112210, 1134436, 1112203, 1112154, 1112152, 1112141, 1112107, 1112105, 1112100, 1112089, 1112061, 1134422, 1134420, 1112044, 1112018, 1112014, 1111987, 1111969, 1111957, 1111908, 1111902, 1111898, 1111892, 1111890, 1134405, 1111874, 1111813, 1111802, 1111791, 1111790, 1134394, 1111760, 1111710, 1111705, 1111702, 1111678, 1111668, 1111662, 1111650, 1111605, 1111581, 1111580, 1111576, 1111564, 1111504, 1111502, 1111472, 1111470, 1111460, 1111439, 1111417, 1111400, 1111396, 1111392, 1111377, 1111345, 1111338, 1111316, 1111313, 1111306, 1134343, 1111275, 1111241, 1111214, 1111188, 1111156, 1111132, 1111119, 1111099, 1111071, 1111049, 1111030, 1111024, 1111023, 1110997, 1134309, 1110964, 1134306, 1110929, 1110927, 1110905, 1110903, 1110874, 1110868, 1110849, 1110836, 1110794, 1110776, 1110730, 1134281, 1110698, 1134277, 1110643, 1110605, 1134272, 1110576, 1134266, 1110531, 1110512, 1134263, 1110498, 1110470, 1110468, 1110426, 1110423, 1110410, 1110401, 1134251, 1110392, 1110391, 1110357, 1110353, 1110344, 1110337, 1110326, 1110322, 1110321, 1110314, 1110295, 1110284, 1110275, 1110264, 1110246, 1110234, 1110217, 1110215, 1110213, 1110196, 1110190, 1134221, 1110189, 1110163, 1110129, 1134212, 1110081, 1134203, 1110001, 1109974, 1109969, 1109917, 1134188, 1109872, 1109853, 1134184, 1109822, 1109805, 1109794, 1109788, 1109784, 1109768, 1109722, 1109701, 1109694, 1109658, 1109657, 1109628, 1109615, 1109599, 1109579, 1109571, 1134157, 1109546, 1109542, 1109540, 1109537, 1109525, 1109496, 1109487, 1109477, 1109474, 1109473, 1109471, 1109464, 1109463, 1109462, 1134140, 1109436, 1109413, 1109408, 1109407, 1109397, 1109396, 1134135, 1109381, 1109379, 1109365, 1109319, 1109311, 1109288, 1109238, 1109215, 1109201, 1109190, 1109171, 1109110, 1134109, 1109050, 1109048, 1109040, 1109022, 1109002, 1108993, 1108985, 1108975, 1108961, 1108959, 1108953, 1108935, 1108922, 1108914, 1108911, 1108875, 1108874, 1108867, 1108847, 1108821, 1108811, 1108809, 1108799, 1108789, 1136676, 1108763, 1108735, 1108658, 1108645, 1108637, 1108636, 1108632, 1108629, 1108607, 1134057, 1108564, 1108526, 1108523, 1108516, 1108510, 1134049, 1108492, 1108487, 1108481, 1108478, 1108472, 1108462, 1108406, 1108400, 1108332, 1134030, 1108268, 1134028, 1108241, 1108227, 1134024, 1108216, 1108203, 1108199, 1108147, 1108131, 1108121, 1108099, 1108075, 1108071, 1134001, 1108011, 1108009, 1107991, 1107982, 1107970, 1107953, 1107919, 1107915, 1107898, 1107885, 1133988, 1107845, 1133986, 1107834, 1133983, 1107749, 1107748, 1107745, 1107702, 1107677, 1107646, 1107618, 1107602, 1107593, 1107568, 1107563, 1107450, 1107401, 1107399, 1107381, 1107364, 1107344, 1107336, 1107308, 1107299, 1133931, 1107245, 1107210, 1107207, 1107193, 1107192, 1107189, 1107171, 1107141, 1107132, 1107123, 1107117, 1107112, 1107108, 1107092, 1107091, 1107085, 1107057, 1133907, 1107033, 1107015, 1133902, 1106978, 1106920, 1106912, 1106873, 1106858, 1106850, 1106840, 1106834, 1106797, 1106764, 1106756, 1106686, 1106680, 1106676, 1106672, 1106658, 1106652, 1106642, 1106607, 1106589, 1133854, 1106543, 1106539, 1106537, 1106535, 1106533, 1106521, 1106516, 1106508, 1106502, 1106450, 1106421, 1106414, 1106408, 1106389, 1106381, 1106377, 1106348, 1106343, 1106335, 1106306, 1106291, 1106290, 1133827, 1106234, 1106230, 1106216, 1106200, 1106196, 1106159, 1106126, 1106125, 1106099, 1106089, 1133812, 1106079, 1133809, 1106027, 1106011, 1133799, 1133798, 1105989, 1105982, 1105978, 1133792, 1105897, 1105882, 1105853, 1105850, 1105831, 1105817, 1105816, 1133780, 1105805, 1105798, 1105797, 1105766, 1105761, 1105753, 1105700, 1105689, 1105666, 1105625, 1105617, 1105614, 1105594, 1105593, 1105582, 1105571, 1105565, 1133757, 1105526, 1105506, 1105498, 1105489, 1105485, 1105441, 1105432, 1105431, 1105427, 1105422, 1133744, 1105381, 1105364, 1105358, 1105337, 1105298, 1105287, 1105276, 1105275, 1105253, 1105248, 1105239, 1105202, 1105190, 1105169, 1105163, 1133721, 1105148, 1105146, 1105144, 1105142, 1105110, 1105108, 1105100, 1105086, 1105073, 1105046, 1105042, 1133710, 1105021, 1105017, 1105013, 1104984, 1136634, 1104957, 1104950, 1104949, 1104942, 1104915, 1104781, 1104773, 1104763, 1104725, 1104720, 1104712, 1104704, 1104699, 1104698, 1104685, 1104640, 1104633, 1104630, 1104557, 1104513, 1104509, 1104506, 1104497, 1104468, 1104458, 1104454, 1133658, 1104406, 1104403, 1104339, 1133644, 1104279, 1104252, 1104250, 1104235, 1104223, 1104221, 1104198, 1104175, 1104124, 1104118, 1104105, 1104099, 1104087, 1133620, 1104071, 1104064, 1104036, 1104022, 1104020, 1104005, 1133611, 1103987, 1103974, 1103969, 1103966, 1103921, 1103911, 1103910, 1103888, 1103879, 1103828, 1103826, 1103816, 1103798, 1103793, 1103787, 1103776, 1103766, 1103759, 1103690, 1103687, 1103684, 1103679, 1103651, 1103601, 1103579, 1103561, 1103555, 1103553, 1103537, 1103535, 1133558, 1103511, 1133557, 1103468, 1103467, 1103446, 1103416, 1103387, 1103355, 1103322, 1103314, 1103303, 1103290, 1103289, 1133535, 1103260, 1103257, 1133533, 1103250, 1103182, 1103136, 1103121, 1103093, 1103091, 1103089, 1103084, 1103076, 1103019, 1103009, 1102998, 1102989, 1102979, 1102895, 1102892, 1102869, 1102862, 1102854, 1102849, 1102839, 1102827, 1102811, 1102803, 1102768, 1133474, 1102714, 1102704, 1102693, 1102667, 1102617, 1102590, 1102589, 1102579, 1102578, 1102498, 1102488, 1102477, 1102474, 1102456, 1133442, 138157, 2610, 1133431, 149979, 377304, 1133428, 216736, 359286, 62525, 2663, 272951, 306421, 42476, 139000, 201194, 204951, 209797, 1937, 1133376, 285032, 1133374, 236763, 121746, 246076, 176953, 147537, 1133366, 360488, 175123, 160276, 159922, 131617, 318841, 80372, 63246, 146783, 349622, 288566, 1133349, 326787, 65809, 356260, 100154, 132133, 74274, 228670, 134903, 260302, 108391, 103402, 103595, 49439, 242042, 339286, 233529, 40228, 19684, 59230, 262686, 90139, 213353, 32202, 56678, 144051, 1133288, 48170, 137662, 49381, 346023, 180370, 132575, 49810, 24093, 1133258, 131405, 102330, 332859, 1133254, 266752, 252103, 1133252, 92176, 54747, 369873, 78332, 76140, 170498, 236254, 160010, 285797, 1133231, 370985, 201444, 239147, 95448, 48846, 122011, 274306, 209497, 173661, 1133202, 1136591, 132938, 1133194, 295928, 1133190, 212146, 318302, 107701, 1133187, 178610, 21792, 23963, 271748, 291553, 137712, 127150, 152627, 1133171, 187371, 130467, 148898, 103328, 347583, 120219, 345861, 126866, 237441, 162657, 323393, 62845, 54246, 333489, 150595, 180956, 1133122, 119400, 183919, 1136584, 100777, 198536, 142148, 1133113, 134628, 117174, 137468, 125791, 38087, 334251, 1133092, 272071, 57674, 28684, 78497, 129183, 31825, 1425, 284067, 243874, 161766, 278827, 266488, 89633, 94173, 194893, 257783, 354466, 187317, 139175, 67222, 1133057, 236776, 195958, 165977, 347294, 262636, 128874, 1133036, 308617, 331343, 30163, 43707, 30649, 27310, 2045, 330504, 83320, 271835, 315683, 178575, 135634, 200918, 1132996, 34366, 1132991, 33137, 20892, 135821, 131850, 1132977, 207703, 340377, 185119, 83959, 378632, 328464, 32278, 1132965, 1132959, 271881, 1132952, 287159, 275737, 274797, 1132945, 309926, 268574, 289499, 1132925, 248362, 1132921, 100932, 204904, 56894, 91565, 280019, 1132913, 106320, 1132903, 182350, 355540, 132495, 323018, 62577, 1132890, 37185, 1136559, 65052, 205954, 225986, 161346, 159992, 1132847, 239971, 113269, 43476, 134905, 326921, 158752, 68626, 334222, 75286, 1132834, 96857, 225703, 134469, 232008, 140770, 50797, 59725, 172940, 130168, 1132815, 183696, 121488, 245833, 87730, 216731, 224688, 1132796, 68618, 38122, 64384, 1132790, 190054, 191625, 202250, 310488, 222133, 247819, 1136550, 247506, 54958, 229260, 200144, 163053, 95594, 175258, 80718, 219809, 320340, 1132754, 85348, 293041, 72577, 334716, 41184, 186086, 50626, 29725, 1132735, 239245, 1132734, 75799, 124895, 1132717, 261101, 91157, 44340, 67422, 92143, 31192, 276343, 49482, 124291, 227104, 57710, 143012, 74057, 94039, 272269, 299381, 143955, 268235, 261207, 124798, 260080, 242796, 117115, 299709, 165393, 1214, 157565, 1132651, 245620, 55727, 313766, 25026, 57218, 344937, 67147, 313491, 114725, 50189, 70108, 303874, 160801, 1132592, 24041, 88882, 289943, 1132583, 22364, 112541, 226572, 200782, 1132564, 63290, 156302, 63795, 184833, 156548, 268010, 227516, 90708, 1132549, 208493, 82973, 147064, 56808, 1132529, 357336, 160313, 278403, 176124, 188784, 37547, 30359, 347113, 293516, 233185, 64430, 127812, 364094, 12761, 72904, 271435, 277093, 166683, 167974, 160574, 301352, 207754, 181479, 117965, 169257, 1132444, 259070, 135347, 153663, 296378, 60870, 184223, 23531, 27528, 98675, 125929, 1132409, 326797, 225419, 359463, 112864, 259437, 340815, 71908, 88808, 1132399, 43167, 42055, 187763, 115594, 95449, 40124, 196233, 202384, 81184, 213758, 138933, 1132360, 343640, 293069, 289801, 273773, 176499, 333375, 1132352, 1132347, 309040, 297010, 295730, 320086, 166508, 1132312, 148209, 13101, 25465, 129457, 109587, 277868, 199407, 36299, 13912, 294614, 160309, 65904, 59911, 200228, 267705, 191894, 76102, 174722, 71138, 321703, 11863, 64535, 123648, 1132255, 188166, 92670, 22836, 137674, 62078, 83401, 373795, 76154, 166325, 274555, 36951, 164940, 266150, 153592, 200695, 239250, 207572, 181305, 303706, 97834, 195677, 201381, 153123, 159867, 189529, 174157, 203720, 320700, 39325, 1136491, 134127, 256066, 39660, 335114, 209769, 1132162, 312826, 455425, 538026, 67225, 162696, 1132147, 689120, 508870, 394040, 404889, 490071, 495618, 549190, 49387, 673689, 735360, 208702, 487934, 131247, 563898, 513591, 502104, 752441, 150443, 485594, 586761, 57, 4776, 214625, 1136482, 493900, 609252, 1132087, 732756, 654897, 179955, 461950, 660426, 685004, 734466, 368900, 156776, 628564, 344155, 473182, 386934, 692151, 706780, 482382, 666238, 720868, 146574, 162946, 728829, 466657, 580605, 129695, 1132047, 398127, 473495, 675920, 506768, 415155, 736117, 525069, 466400, 671117, 653909, 643328, 383847, 517763, 713301, 220290, 181644, 389541, 1132006, 699218, 537744, 745784, 369105, 723486, 1131983, 642800, 452336, 756790, 734758, 334219, 255251, 702598, 450543, 613422, 626761, 89634, 596136, 167533, 727181, 429906, 608124, 713357, 573701, 655939, 684502, 538143, 662108, 265494, 120593, 722220, 445502, 597686, 544220, 537188, 718364, 653077, 624210, 710887, 691798, 169584, 705174, 1131909, 209531, 452924, 451826, 647597, 463021, 709056, 490982, 643870, 144842, 466338, 1131892, 91778, 203783, 756681, 433786, 756829, 366342, 728735, 1131884, 538878, 508254, 575461, 504751, 229045, 628808, 108500, 703554, 599137, 671829, 720949, 696201, 554585, 389908, 745278, 1131840, 464930, 674566, 700756, 486173, 1131830, 21838, 353333, 1131821, 731129, 1131818, 680613, 1131813, 730149, 556637, 430755, 727943, 20616, 599673, 455256, 548054, 539566, 501894, 585888, 443964, 644356, 611442, 521254, 713278, 1136443, 547374, 741173, 1131777, 606672, 483253, 588308, 725726, 244472, 238804, 490000, 478255, 708144, 236269, 650476, 1131754, 465990, 647887, 435864, 714849, 660983, 415438, 543638, 1131738, 1131735, 534684, 733956, 643361, 463180, 671027, 1131724, 618695, 396098, 190164, 84797, 477552, 1131713, 399730, 1131703, 1131699, 642647, 192397, 370635, 466536, 549327, 658273, 522054, 707645, 386213, 463137, 562352, 440144, 169166, 595808, 508476, 450426, 588762, 680190, 636417, 618349, 629913, 422890, 277177, 642252, 447697, 515005, 503164, 259924, 584017, 558978, 694851, 672262, 78640, 537267, 581447, 1131613, 636093, 417895, 1131609, 180593, 739636, 625256, 731251, 188053, 449539, 475394, 738829, 419729, 1131593, 121843, 4383, 582098, 489238, 722550, 704182, 431159, 473020, 615000, 65692, 535699, 533613, 589875, 635379, 1131559, 1136425, 1131557, 464864, 746963, 563962, 1131554, 409031, 743777, 582128, 496927, 617223, 1131543, 677292, 540318, 243941, 747597, 578356, 1131533, 576964, 734678, 707689, 501442, 561499, 692815, 100940, 1131512, 473028, 1131510, 497632, 1131507, 383831, 566937, 290290, 49850, 637080, 614540, 689461, 747285, 614620, 694270, 633073, 391077, 603796, 676426, 1131478, 415661, 402595, 515273, 1131467, 737266, 526352, 460855, 485891, 726929, 96565, 582339, 1131446, 586754, 505992, 251774, 588712, 677133, 455371, 710756, 342150, 604332, 679482, 17199, 451643, 755461, 259312, 1131415, 1131411, 725828, 564310, 608244, 327855, 675569, 706373, 724657, 283924, 702722, 220398, 598348, 150347, 1131396, 413054, 516429, 730062, 710329, 479687, 1131383, 737913, 469566, 591326, 429876, 405298, 21744, 551851, 507424, 630264, 499666, 561538, 316436, 623112, 494786, 515775, 641164, 1131343, 574337, 614338, 562030, 455513, 562904, 753220, 562697, 738248, 230808, 1131320, 484350, 198610, 718133, 685661, 402991, 607582, 500775, 134875, 747566, 429205, 707853, 1131307, 1136401, 1131301, 521329, 392124, 393611, 1131295, 622262, 527745, 227967, 399503, 441204, 1131278, 580227, 592329, 693494, 618024, 664540, 697374, 1131260, 1136397, 43548, 632020, 750167, 410387, 187330, 685998, 743021, 613923, 446160, 528174, 1131240, 707577, 567878, 240102, 619013, 458674, 1131227, 1131222, 412136, 515123, 571215, 429182, 1131216, 674956, 1131209, 438344, 403520, 632935, 112928, 651821, 696173, 1131192, 674691, 633998, 1131182, 565366, 533428, 273443, 1131173, 464548, 681173, 451150, 737112, 389501, 579601, 387851, 593455, 534021, 452761, 643223, 456807, 566216, 83666, 1131155, 454824, 634650, 242583, 488073, 47269, 522358, 430258, 711802, 455957, 538373, 577234, 714881, 535009, 716082, 521367, 486431, 490752, 1131104, 442307, 709522, 694739, 703736, 1131092, 692238, 708094, 405684, 1131075, 736703, 632755, 144050, 463660, 534279, 493020, 692201, 723457, 527398, 557952, 1131049, 1131048, 699817, 241937, 1131046, 390313, 540655, 494111, 457426, 479475, 709726, 701898, 430704, 613827, 754589, 688208, 352420, 508510, 150505, 418883, 595641, 153809, 427372, 1131013, 589844, 1131008, 517135, 1131005, 1131004, 540951, 661717, 671630, 28661, 1130996, 406838, 629420, 1130994, 719438, 1130988, 671720, 702952, 644204, 563542, 437910, 716995, 673666, 604500, 575378, 573622, 484886, 676791, 515531, 708693, 370750, 644678, 1130937, 753089, 724623, 219723, 1130935, 590560, 1130933, 571087, 515785, 555353, 684994, 603617, 503963, 397564, 226012, 262542, 405974, 603714, 177610, 389739, 682567, 1130897, 674513, 567881, 531311, 544191, 455561, 426367, 380561, 245295, 663156, 431674, 493829, 129347, 686392, 718295, 74000, 514264, 678466, 503613, 127437, 739166, 583772, 722441, 188318, 719371, 626218, 258390, 717111, 1130849, 418725, 595910, 513838, 674571, 1130837, 533312, 29811, 466878, 459503, 623603, 1130830, 694106, 593611, 569902, 725715, 668648, 402832, 530079, 534250, 327062, 1136350, 204088, 457951, 416646, 511466, 576357, 72015, 1130806, 568405, 1130805, 742695, 538393, 412073, 637960, 598934, 561834, 747004, 200289, 562002, 524438, 682859, 360822, 394208, 1130772, 78501, 628136, 445026, 689657, 486716, 576292, 682902, 435130, 721273, 563938, 446290, 128543, 699279, 529769, 721661, 717849, 626517, 475408, 493845, 587923, 639288, 1130732, 674504, 599504, 1130728, 543290, 1130726, 444598, 439766, 621550, 417717, 552319, 706900, 468021, 412410, 455359, 161162, 414393, 1130684, 527633, 720395, 576305, 484467, 619408, 515064, 452385, 686422, 567899, 1130672, 715765, 663755, 709494, 55079, 727291, 669130, 1130667, 428479, 706985, 570789, 262878, 616705, 83712, 612831, 319218, 619655, 396122, 522212, 729058, 745317, 438891, 1130640, 1130635, 740366, 452155, 734746, 409853, 624662, 403035, 593489, 420872, 436325, 557806, 1130608, 488887, 596699, 344368, 416672, 614567, 554792, 575621, 456383, 547426, 547711, 521791, 672626, 148977, 514360, 25802, 456029, 510645, 506278, 642473, 73619, 552459, 1130575, 433683, 391829, 642144, 509654, 408986, 755381, 1130558, 546404, 561461, 665022, 1130548, 493826, 406237, 1130536, 680480, 1130524, 662687, 731691, 748843, 1130511, 414899, 491017, 569409, 560815, 412865, 712140, 388319, 491200, 297146, 695196, 499188, 115952, 524469, 14244, 130858, 593986, 489013, 541229, 29667, 126582, 458638, 497813, 418389, 123525, 675245, 592333, 640857, 1130449, 464240, 502221, 409003, 700345, 695697, 661986, 1130435, 578560, 1130431, 382254, 586049, 554435, 461052, 410169, 502453, 554031, 159535, 695737, 426684, 473062, 646245, 402135, 573471, 557944, 691188, 523197, 481998, 94642, 135936, 397550, 511215, 234583, 643749, 490936, 499385, 1130383, 644600, 1130378, 625691, 675823, 232508, 512536, 642301, 574114, 717873, 235560, 485567, 739828, 416823, 567870, 521342, 271620, 1130345, 636141, 30677, 444485, 621989, 1130340, 558003, 1130335, 594086, 405780, 1130327, 323085, 487275, 747961, 59330, 399595, 510004, 1130312, 696381, 435223, 517928, 1130307, 684324, 729023, 179219, 638898, 1130296, 585585, 706401, 326176, 730832, 546217, 531470, 514029, 599538, 1130277, 663182, 453350, 358771, 413075, 197375, 542015, 179066, 323565, 439146, 749326, 1130240, 50863, 1130232, 556923, 70752, 537610, 571009, 750029, 616483, 639157, 760171, 552670, 392181, 579092, 497356, 744538, 1130193, 498576, 413335, 444318, 558963, 644392, 584499, 474468, 61526, 246118, 46683, 712006, 406640, 494285, 587145, 1130156, 216656, 695643, 647777, 529272, 591718, 151408, 148503, 663916, 429843, 526266, 696148, 408157, 234277, 645349, 402799, 1130123, 703298, 464465, 6113, 727572, 495159, 68164, 596533, 1130115, 648174, 529043, 278691, 354076, 415184, 396312, 493439, 617033, 703240, 427221, 631852, 147550, 144138, 1130095, 479495, 501645, 79438, 412750, 30282, 1130084, 375206, 407457, 582138, 581229, 649335, 498021, 728112, 656101, 403914, 633411, 110375, 159767, 557592, 622238, 630123, 513303, 739171, 631665, 1130023, 749976, 1130015, 1130013, 611400, 600638, 1130006, 486312, 538718, 323959, 699896, 466456, 696519, 489166, 509832, 683410, 443766, 582390, 558809, 242115, 437191, 1129981, 416692, 427311, 520202, 278034, 727154, 670360, 1129973, 620882, 705905, 549722, 709492, 744835, 1129959, 659901, 473029, 668190, 480320, 671862, 396967, 566618, 629337, 444688, 497659, 693353, 662982, 655097, 715661, 430114, 428503, 596837, 394148, 506577, 1129896, 418615, 677304, 547770, 1129888, 6055, 407007, 1129886, 724767, 634856, 581844, 1129878, 596613, 721708, 169442, 749813, 664034, 611049, 547771, 729755, 626823, 397579, 62517, 500646, 721882, 490612, 284910, 594476, 241399, 585220, 394980, 732794, 1129841, 474961, 1129838, 745872, 582183, 410391, 1129835, 414970, 389506, 412503, 559629, 511806, 504057, 601649, 12553, 1129822, 418501, 535288, 407383, 743489, 500680, 393238, 712468, 399701, 736364, 536101, 467932, 738788, 469356, 193676, 668999, 400681, 516185, 532352, 691956, 177167, 424318, 422938, 423807, 572976, 530295, 369311, 448042, 680004, 536822, 748434, 451642, 1129769, 177775, 614001, 477793, 513836, 408908, 482580, 463421, 108813, 466454, 540572, 436582, 601783, 470680, 670004, 497483, 410329, 484646, 1129733, 221987, 1129731, 535067, 516941, 725044, 656422, 1136245, 555674, 700590, 667607, 334662, 703134, 412395, 143721, 565173, 236793, 1129700, 691711, 641617, 567820, 651708, 637695, 664605, 625633, 658427, 62521, 61519, 636208, 593007, 722260, 89360, 488839, 325057, 684536, 553161, 600524, 411822, 516087, 542582, 1129650, 636814, 495608, 577746, 553571, 1129642, 662028, 431988, 650966, 500574, 230013, 1129630, 515217, 725969, 737654, 714335, 678724, 606791, 427033, 466865, 466968, 500832, 674396, 661623, 693880, 462794, 181329, 751571, 645149, 570875, 439879, 1136233, 460506, 514421, 602653, 544978, 594835, 686436, 111077, 534202, 631789, 583527, 706624, 675316, 404221, 549110, 388265, 549237, 633137, 716717, 661990, 132345, 600959, 726834, 642841, 249364, 645818, 752488, 705551, 368124, 458832, 418832, 1129518, 486839, 61075, 1129514, 463679, 680225, 739996, 1129508, 512264, 595689, 686229, 435526, 1129499, 734836, 237465, 617192, 89149, 596659, 620810, 188273, 547627, 548020, 402132, 426403, 204520, 508162, 536931, 745373, 624304, 1129483, 477474, 249814, 663878, 621190, 383220, 255025, 432162, 564558, 617318, 450640, 547661, 426435, 726971, 569053, 718490, 412407, 659825, 490358, 723295, 1136215, 1129452, 521026, 1129448, 448305, 733585, 1129444, 637187, 661763, 754567, 715756, 706501, 579403, 400491, 1129436, 581075, 416561, 1129433, 479358, 434008, 658865, 1136212, 630092, 524266, 533379, 35150, 552364, 430536, 1129422, 488571, 759514, 754609, 409736, 680230, 688646, 465156, 269425, 571432, 553530, 632671, 469121, 552458, 610414, 724606, 759101, 465920, 1129357, 1129356, 407302, 538850, 425694, 425438, 648925, 439929, 497477, 423254, 129008, 530174, 511841, 625685, 397803, 1129315, 756934, 603085, 645080, 512985, 552563, 628657, 388981, 18075, 321226, 758918, 213726, 453857, 567505, 598973, 684899, 412357, 643181, 617733, 92974, 686139, 657594, 653662, 571431, 510514, 626866, 591784, 595422, 594352, 613576, 1136198, 702508, 731662, 199831, 590445, 679625, 412182, 677485, 395326, 519027, 1129241, 723897, 599724, 442810, 1129232, 701050, 606110, 60900, 721942, 460002, 689019, 518796, 81017, 687671, 628109, 457609, 717538, 590726, 535449, 418157, 497919, 390770, 681047, 456674, 653450, 642760, 1129167, 541708, 738451, 599178, 36033, 452422, 703281, 733738, 687792, 202310, 752388, 432012, 1129145, 149491, 535668, 582756, 612251, 541429, 642328, 588848, 662167, 742808, 558303, 718489, 750114, 499364, 653517, 1129103, 400435, 533677, 648975, 1136890, 415479, 632456, 154904, 659458, 1136183, 580040, 1129085, 1129084, 521869, 516532, 691028, 755688, 726379, 713127, 432503, 593386, 1129067, 437986, 458616, 688815, 410946, 612158, 98415, 630845, 583249, 697861, 1129055, 611027, 1136180, 156707, 722996, 469976, 386091, 1129042, 396974, 747939, 675811, 172986, 749215, 468437, 173834, 264602, 566732, 643303, 556581, 664751, 57292, 461190, 461659, 124607, 89100, 734136, 92742, 635081, 14571, 518785, 196450, 675430, 613222, 1128995, 581535, 412, 598286, 403328, 653560, 474875, 602263, 416161, 1128981, 589171, 1128976, 742238, 404407, 750926, 544890, 511401, 448000, 608870, 637004, 1128958, 1128956, 503949, 208417, 1128954, 333182, 1128949, 419783, 591543, 637698, 402300, 1128939, 668300, 566407, 553988, 486939, 1128927, 537223, 176781, 748579, 1128917, 532494, 472232, 1128914, 647637, 1128903, 1128900, 79203, 271544, 96933, 1128888, 545791, 748018, 68360, 383029, 395660, 493218, 423863, 518578, 669798, 72298, 424280, 655607, 1128872, 561064, 1128860, 378866, 569527, 462111, 702919, 596645, 626166, 252887, 730156, 389192, 584757, 611785, 588563, 696261, 535610, 730541, 1128840, 399884, 389808, 470372, 294538, 411600, 487606, 428836, 609922, 64259, 1128821, 1128818, 427505, 495243, 576792, 631266, 202726, 582484, 395665, 693097, 714863, 567976, 392882, 450290, 357777, 583824, 573221, 335374, 525433, 604884, 622467, 524369, 609071, 613079, 622187, 569862, 269960, 187574, 460663, 1128760, 418801, 604102, 1128752, 142656, 69841, 572432, 581303, 744230, 637909, 1128738, 450498, 640885, 505647, 1136152, 1128726, 154164, 493723, 477176, 327812, 651679, 453305, 697773, 121431, 415021, 400908, 581377, 236105, 735502, 459153, 1128691, 1128689, 322610, 646814, 449717, 680854, 250927, 541155, 1128681, 197174, 403824, 512569, 526597, 1128676, 738859, 569047, 1128668, 97873, 549663, 596007, 678053, 688140, 653413, 620231, 197875, 734529, 416738, 509003, 164798, 567024, 413801, 366118, 520273, 557046, 424856, 566615, 122690, 572446, 1128624, 543631, 1128620, 634302, 610923, 755062, 608491, 561885, 575606, 555772, 1128567, 480646, 706291, 472957, 564382, 44537, 1128562, 740168, 274605, 653579, 407816, 634055, 651969, 280704, 1128549, 659297, 447912, 631766, 408905, 173181, 522821, 624426, 1128539, 407575, 409057, 409708, 644637, 395821, 21003, 413172, 404156, 1128524, 412817, 440973, 576361, 626536, 345102, 655332, 1128511, 552956, 1128508, 708745, 409157, 1128494, 1128492, 22256, 406582, 1128484, 156181, 700430, 481345, 690869, 759007, 587999, 677421, 514241, 408686, 561991, 572170, 604126, 741514, 1128450, 610645, 725355, 751255, 636603, 1128432, 417905, 1057246, 812967, 1128427, 771994, 337952, 64882, 779025, 1128417, 818265, 771555, 845740, 320499, 820021, 946751, 783583, 1013322, 975809, 70472, 1037917, 968788, 1022698, 373224, 875528, 1128385, 1128380, 964482, 245463, 873986, 1029871, 1032729, 1037279, 149975, 1128346, 881533, 795754, 901007, 970549, 1128337, 959256, 1068313, 289517, 89877, 836062, 777839, 961525, 1078498, 242603, 986960, 1128319, 985840, 1030163, 985900, 857963, 797562, 1012478, 1128297, 1037088, 796223, 977852, 832897, 1128291, 1128287, 936422, 881142, 876885, 918446, 769310, 165237, 1128280, 795757, 947066, 840136, 1128276, 95167, 875518, 868055, 893658, 1136113, 854924, 1128264, 995898, 892353, 1045190, 1053031, 327873, 1011812, 838440, 132007, 1041905, 248407, 878415, 1018792, 874523, 774158, 1008502, 997671, 1076615, 227230, 1020019, 827488, 810830, 165287, 866615, 899741, 1000993, 937626, 1057030, 1136108, 1057241, 1014933, 1008191, 1055448, 22817, 809594, 136476, 899268, 933594, 999612, 1019246, 1128198, 887883, 947119, 917298, 860972, 1050241, 968631, 229107, 812644, 1034759, 857520, 929366, 982144, 843768, 42541, 229677, 1128166, 835940, 1128164, 1128160, 984952, 1032680, 894139, 926094, 992867, 936255, 1074449, 186071, 90368, 72476, 802667, 795872, 859387, 188445, 820387, 1033642, 1128119, 761430, 236359, 800142, 1005146, 998479, 1034491, 800703, 1049419, 167873, 772409, 1046115, 828068, 1054959, 805998, 978459, 1128087, 980023, 1128080, 916768, 1044449, 925041, 962160, 1128071, 191143, 1056313, 826354, 781539, 304380, 203790, 1128064, 797200, 22372, 803115, 957479, 768114, 991332, 897222, 809570, 295077, 786761, 243076, 796050, 1064020, 1128040, 999644, 239143, 1013556, 1128028, 1032719, 956231, 887242, 879329, 907807, 1020489, 967278, 893358, 1078015, 861865, 81693, 784092, 205959, 768939, 65305, 1127990, 1057936, 1040752, 935962, 107205, 825453, 949738, 952722, 924092, 1012829, 943014, 258617, 774027, 912931, 1127969, 893657, 829295, 1064659, 1127959, 772836, 932294, 71557, 1037376, 802776, 1127938, 856861, 1036776, 1016732, 1127932, 815939, 1003514, 1031609, 161027, 254740, 1024432, 903268, 1127914, 1127912, 136726, 1058856, 1043229, 121484, 1013679, 859955, 987306, 864853, 893756, 1028701, 1127897, 1127896, 845232, 839841, 899014, 916107, 1063739, 1010069, 842886, 767589, 1064344, 905050, 811245, 880110, 1054797, 1070930, 252441, 841870, 763443, 849720, 1026733, 996146, 883176, 937486, 791397, 158817, 999522, 8718, 895721, 1034305, 824384, 877676, 1127822, 322709, 907310, 1127810, 24636, 833302, 811447, 858395, 1127802, 978096, 331648, 1034015, 27705, 56067, 148159, 925169, 834934, 1046166, 1078080, 771730, 1055227, 871301, 1030994, 67359, 958846, 1033381, 885018, 1127754, 892490, 788960, 1127753, 891987, 1127752, 174305, 78352, 1127741, 230824, 919556, 174039, 1127730, 112175, 885095, 71390, 839912, 990480, 901678, 1127718, 838709, 890075, 917100, 314262, 951320, 1034261, 1037159, 941866, 1001968, 673041, 869918, 978121, 1127697, 1127695, 20734, 779540, 830234, 960998, 916453, 931401, 972092, 854884, 233900, 226741, 253406, 1071255, 856393, 1066186, 171431, 809913, 1127674, 870172, 1127668, 859431, 763641, 1003630, 1032978, 767490, 766142, 1072559, 811974, 1036830, 252314, 821619, 168786, 770534, 67379, 844464, 8452, 954096, 815308, 1127621, 856978, 844433, 1010270, 761225, 803948, 867262, 968186, 988542, 1059674, 1063049, 792925, 1064595, 143062, 159842, 987894, 853267, 150207, 1127588, 994761, 975140, 352949, 1070417, 865909, 86290, 804916, 1127570, 1127567, 980996, 1127562, 1127561, 1065678, 1068306, 1127554, 935643, 883929, 1040730, 1127547, 946839, 941232, 968238, 836003, 860643, 1048716, 1073975, 847831, 870184, 797815, 890953, 920289, 925119, 979571, 841302, 8136, 1031152, 976102, 118484, 865206, 765659, 770356, 72956, 1136042, 793699, 864818, 932299, 949516, 1061852, 1068587, 981837, 827381, 859101, 979007, 902790, 871016, 779201, 918750, 1127498, 279176, 838421, 885663, 60764, 1127493, 1061326, 1127489, 1067801, 791513, 20010, 865218, 874196, 939698, 934193, 1127468, 790118, 923070, 1005149, 1021302, 1002690, 952938, 1059177, 1068025, 1127462, 864864, 911889, 1025104, 24462, 828518, 1127448, 1061590, 998480, 1127444, 54818, 903790, 988595, 180298, 894466, 1127434, 993838, 1127425, 1051206, 1015668, 764766, 890643, 919771, 1127411, 797565, 908897, 795460, 880092, 907635, 979086, 914186, 187585, 1040212, 1127398, 1013228, 192284, 991798, 916050, 907301, 872978, 1136028, 1080183, 831024, 789014, 1127378, 150873, 1127364, 925292, 1054749, 766379, 129435, 858696, 798239, 960142, 1127351, 1005502, 919760, 804755, 813370, 1049877, 919707, 1070083, 995599, 772833, 888413, 1127325, 947430, 843139, 761907, 1055761, 264434, 814908, 929714, 761014, 775430, 932859, 824644, 37706, 842042, 115718, 864762, 762072, 101478, 903661, 854655, 72228, 875937, 937168, 1050808, 869292, 783335, 976771, 761313, 979890, 868913, 981240, 1015949, 1037830, 1018202, 982019, 35370, 816794, 1008453, 982696, 129700, 838235, 1127195, 865638, 917436, 881246, 762434, 806450, 1030502, 1127188, 1127184, 65383, 904565, 1136013, 967706, 1053723, 1023767, 31169, 1043151, 976293, 1127177, 1014055, 297058, 1049092, 1127162, 834856, 858461, 1025444, 968552, 902969, 844140, 982810, 77565, 881067, 340541, 319123, 781808, 1136008, 977828, 266611, 899428, 800348, 1043976, 1127110, 316262, 295361, 791170, 995778, 1040530, 357297, 56962, 982229, 1072522, 1025072, 168655, 1127084, 1127081, 884436, 989526, 864694, 187675, 799293, 292227, 892584, 1127044, 1030954, 1011348, 942728, 861724, 966614, 966679, 18164, 1071370, 976827, 1127025, 226190, 782125, 937753, 971564, 955228, 955028, 1052076, 1009291, 960734, 952306, 1126994, 1017930, 896746, 1126981, 145391, 1049202, 902586, 1126971, 878401, 1049390, 1126963, 855667, 985913, 994564, 913285, 1126958, 888689, 891634, 1013904, 916214, 975875, 1126948, 981828, 1126945, 1014189, 283348, 1126935, 881738, 1046757, 1126932, 996236, 1126931, 1080843, 1126925, 1081591, 1075711, 860145, 830462, 1126914, 240792, 1126910, 266390, 986693, 1001492, 73882, 1126880, 941093, 979044, 810631, 1126875, 1042543, 1007481, 87762, 1065739, 292284, 980185, 796451, 909549, 812006, 304449, 116653, 905706, 788034, 1002287, 1126817, 982967, 1126815, 896446, 870582, 1126807, 1019607, 1060142, 917813, 95381, 1073430, 1041628, 197487, 903975, 1049177, 233881, 1005869, 1052835, 877076, 1068315, 1077039, 999829, 1060669, 931621, 1126761, 974727, 1135966, 869887, 888762, 1126750, 1051205, 1035340, 819279, 1126742, 136473, 1126736, 858242, 776547, 1126711, 800274, 852966, 1051326, 803633, 984476, 12954, 911480, 855901, 981948, 929693, 761941, 1078365, 770648, 767499, 253693, 203321, 836888, 1126691, 278684, 931678, 1078766, 12048, 1126673, 899891, 123919, 904461, 798979, 798354, 1126658, 1065534, 873607, 1126654, 825961, 1126651, 955453, 331284, 92008, 908665, 1030949, 316302, 777578, 1033997, 1027534, 956670, 897892, 999910, 1000893, 1046684, 1126609, 821676, 793432, 325310, 1126587, 1076289, 935870, 980263, 799784, 858159, 892768, 856417, 992595, 795622, 1065206, 276928, 1053061, 833797, 881070, 1126545, 772968, 949129, 41048, 1049736, 1126533, 855243, 773025, 1016486, 1126531, 992162, 830306, 963943, 990969, 761883, 952768, 865754, 826731, 833544, 794347, 1126517, 853533, 811650, 1049180, 158469, 1126507, 303585, 271672, 1126499, 910699, 1135935, 1126491, 1002572, 939521, 1136859, 1135933, 122010, 927216, 868111, 250673, 823415, 1004948, 993055, 814791, 1078745, 966542, 915433, 8356, 1037981, 836498, 803596, 931027, 837140, 1065923, 1126425, 1061433, 852842, 1126416, 1126414, 802817, 910246, 1126403, 772885, 968995, 789140, 905574, 1070533, 25179, 869486, 867346, 930833, 981606, 833432, 367519, 1126380, 51514, 1126377, 1126374, 1081455, 971331, 903073, 921267, 8008, 995529, 879076, 1004774, 1126361, 908077, 845321, 960803, 981207, 883184, 935437, 917606, 65416, 906238, 232919, 963471, 858085, 888024, 1009408, 1056764, 9082, 131573, 1126331, 1081930, 762035, 811758, 975774, 810958, 1126317, 887806, 976678, 849869, 304430, 893117, 831794, 783751, 67545, 88116, 834181, 836636, 1032658, 932447, 767549, 959723, 866276, 881767, 1041743, 1035658, 1043138, 1016027, 232609, 958435, 998270, 1043346, 1126267, 845725, 840782, 807974, 1028772, 11258, 876934, 831217, 870157, 906391, 1052164, 831882, 1068326, 299461, 781689, 1126245, 1126244, 972647, 983987, 968667, 783989, 119263, 847301, 856149, 272863, 939453, 1050794, 954363, 1126223, 996876, 814920, 1126215, 974485, 908101, 922237, 961305, 1126209, 992904, 971415, 786171, 1022554, 896672, 807223, 899876, 964608, 792143, 59381, 1126178, 13823, 904780, 1041217, 842703, 1126146, 952165, 332824, 9129, 1049881, 877556, 1108939, 1112389, 792752, 1119729, 1105095, 1105103, 1128373, 1127622, 1124979, 885490, 1119827, 190044, 500575, 883785, 264403, 1108100, 421756, 1108307, 966413, 1111546, 156493, 1124145, 1110199, 1056204, 199143, 835929, 1063750, 1104031, 398483, 432930, 478605, 1044797, 1124464, 1107988, 130510, 1127893, 1135377, 1126206, 645693, 1133328, 646207, 1012021, 489204, 1119075, 573724, 600573, 1120447, 574575, 1055865, 494835, 1126814, 168216, 100983, 194013, 1119092, 1133167, 1133418, 427578, 324211, 11096, 1134787, 89928, 499920, 527433, 40578, 694342, 1125225, 1136427, 1128856, 719381, 53175, 131651, 1037798, 915593, 264014, 1121402, 962179, 1117099, 744366, 277780, 1114563, 1014126, 1117346, 148538, 451602, 474735, 359349, 903469, 1115776, 1104492, 315637, 1112341, 588587, 706080, 117831, 1120868, 1111906, 523270, 133358, 67262, 1121166, 805321, 1129828, 131843, 104861, 833860, 207786, 691330, 1103528, 1132213, 335594, 1134138, 138632, 671071, 705609, 1114819, 855434, 1134463, 747511, 502261, 183378, 654723, 1117387, 479871, 541571, 1106007, 60235, 180442, 710347, 1124210, 287683, 490595, 291865, 794725, 1103812, 436600, 1047259, 964223, 564054, 87181, 1116052, 554515, 443396, 1123581, 714453, 972007, 929033, 433234, 1121709, 88073, 87452, 1005165, 1133249, 953067, 101169, 855410, 1121276, 1114646, 19335, 789700, 47923, 301524, 405717, 165633, 952774, 766511, 1106293, 452431, 1109818, 1047902, 306076, 551040, 1059231, 182539, 1115569, 351697, 904965, 292906, 662372, 364142, 20455, 1119058, 203318, 1126813, 240053, 1115392, 1113437, 1122461, 1116341, 1129237, 912070, 278813, 423273, 507445, 25129, 146187, 634428, 1121986, 321441, 532603, 1030303, 1037496, 1043135, 1045109, 1049519, 1051399, 1056416, 1064670, 1065636, 1071750, 1103153, 1103791, 1104501, 1105792, 1105860, 1106928, 1106979, 1107315, 1107440, 1108450, 1108466, 1108473, 1108651, 1108729, 1109699, 1109707, 1109850, 1110678, 1112142, 1113042, 1113256, 1114166, 1114286, 1114993, 1115210, 1116380, 1117817, 1117886, 1118370, 1118426, 1119118, 1119543, 1120588, 1121353, 1121879, 1122138, 1122767, 1122843, 1123657, 1124552, 1125632, 1125755, 1126523, 1126738, 1127004, 1127233, 1127540, 1128456, 1129081, 1130705, 1130734, 1130847, 1131069, 1132044, 1132247, 1132532, 1132842, 1132943, 1132950, 1133485, 1133579, 1134094, 1134207, 1134431, 1134680, 1134939, 1134988, 1135268, 1135283, 1135413, 1135626, 1136043, 1136047, 1136769, 1136962, 118440, 119821, 121171, 125659, 135802, 141630, 144862, 156498, 166046, 169208, 174463, 175920, 177604, 181626, 197312, 206106, 227873, 23849, 240158, 245052, 246883, 253749, 256942, 257119, 258062, 26703, 273695, 302846, 318362, 324585, 330501, 330975, 332593, 336901, 3505, 360721, 384356, 390360, 405163, 42255, 425632, 426175, 42752, 435548, 436707, 444389, 449367, 452915, 463271, 469589, 47210, 482726, 48792, 50122, 514096, 519025, 53233, 537060, 537817, 543273, 545355, 555530, 583468, 586148, 590019, 605127, 610265, 611953, 640502, 64647, 653399, 655526, 655914, 660198, 67316, 673670, 701453, 703782, 708979, 716113, 730539, 735482, 735922, 75198, 768208, 779302, 792635, 794223, 794429, 801118, 804066, 808400, 809525, 814183, 819983, 849550, 85020, 850358, 86606, 877809, 883915, 88495, 911232, 914916, 91576, 918162, 938400, 940547, 945835, 978031, 985594, 99005, 997622, 999466, 132622],
+    'msmarco-passage-dev-subset': [1048585, 2, 524332, 1048642, 524447, 786674, 1048876, 1048917, 786786, 524699, 1048995, 786857, 524722, 873886, 524733, 786918, 786937, 1049085, 262232, 524835, 524848, 1049200, 1049221, 1049329, 1049368, 787255, 262974, 1049456, 1049774, 1049791, 525534, 1288, 1049894, 787784, 1049955, 1050007, 525779, 263670, 811852, 1576, 525868, 306105, 1050231, 1050253, 1050275, 526013, 263889, 788431, 264150, 526331, 788484, 1050670, 1050695, 264284, 1050747, 1050778, 2235, 264410, 788702, 1050857, 437291, 1050923, 526671, 788851, 264594, 830531, 1051095, 1051108, 1051112, 789037, 1051211, 1051214, 1051223, 1051229, 1051257, 264827, 526984, 1051279, 1051285, 1051307, 1051339, 1051352, 1051372, 2798, 1051422, 789292, 1051475, 789332, 1051520, 1051530, 2962, 1051571, 961705, 1038859, 1091234, 1051723, 1051755, 1051808, 527568, 1051886, 1051902, 527625, 1051942, 1051943, 527769, 1052115, 527853, 265729, 790059, 1052274, 790178, 265960, 528117, 1052421, 1052427, 838116, 1052563, 1052585, 1091264, 1052615, 1052640, 830812, 1052717, 1005595, 1052948, 568709, 1052965, 1052985, 528760, 1053111, 528841, 525047, 266760, 1053219, 1053253, 4696, 791140, 266920, 791223, 529090, 267012, 529230, 4947, 1053611, 830973, 1053716, 568841, 267341, 1053896, 1053901, 1053931, 1053992, 1053997, 267566, 791862, 1054023, 791916, 568895, 267644, 918424, 1054186, 1054189, 529918, 1054328, 792187, 1054339, 1093231, 1054438, 1054450, 1054451, 1054468, 5925, 44686, 1054593, 1054595, 1054610, 1091163, 1054707, 792595, 6217, 1091337, 792688, 530572, 792742, 530601, 1054923, 1054958, 1054969, 792847, 1054999, 1055125, 1055176, 1055197, 1055351, 531142, 1091360, 1055505, 793475, 831302, 242713, 1055717, 531490, 1215, 1055889, 1093487, 1055940, 531676, 1056057, 1056060, 831380, 1056163, 1056211, 1056265, 307118, 1056303, 166111, 1056420, 1056437, 1056446, 1056482, 7968, 1056548, 270140, 1056580, 794469, 1056644, 1056726, 1056758, 794625, 525660, 1100134, 1056850, 270422, 1056950, 270520, 270521, 1057015, 270603, 794893, 270642, 1057098, 1057112, 1057139, 1057168, 1057251, 1057270, 88831, 8714, 1057334, 1057367, 533105, 8854, 1057446, 1057476, 1057488, 1057539, 569473, 1057631, 1057656, 9083, 533398, 1057708, 569507, 1057757, 1057937, 1057996, 9454, 1058036, 1058100, 795991, 1058140, 1058141, 1058142, 1058165, 1058182, 794665, 1058271, 1058325, 1058415, 1058442, 1058470, 272075, 1058515, 796383, 831784, 534305, 1058601, 1058604, 569674, 220151, 10205, 1058792, 1058822, 10276, 1058885, 10312, 534617, 272500, 1058952, 1058978, 1059045, 1059077, 1094039, 534941, 272815, 1059253, 1059287, 1059420, 1059421, 535142, 1059442, 273014, 1059496, 1059504, 11006, 1059601, 1059619, 11050, 1059646, 1059698, 11133, 1059801, 273449, 535599, 273481, 535627, 273522, 1059970, 535743, 1060039, 1060040, 176677, 1094191, 812190, 1060305, 1060342, 1060391, 798253, 798284, 1060462, 1094249, 11913, 1060496, 274067, 1060566, 274175, 1060616, 1060623, 536480, 1060795, 1006922, 570023, 262280, 1060868, 1060881, 613727, 536654, 45757, 482666, 798883, 1091545, 536791, 798945, 570068, 1004258, 1061167, 1061210, 1061237, 536995, 1094389, 12741, 1061324, 1061382, 274981, 1061472, 12903, 275049, 275137, 537301, 537410, 308032, 1061762, 275355, 537505, 537526, 275528, 275534, 13397, 537706, 537761, 275629, 537825, 1062190, 1062223, 1062233, 537995, 1062332, 1062334, 1062350, 275997, 1062457, 800318, 1062511, 838453, 1062589, 538309, 1062603, 1094605, 1062609, 538333, 538340, 276208, 46040, 1062687, 14151, 276298, 1062744, 276329, 276338, 276348, 1062784, 832508, 1062928, 1062961, 471850, 576601, 800987, 1063177, 818798, 1063349, 1063371, 276979, 1063461, 1063478, 14947, 14963, 539278, 15039, 801478, 15063, 1063644, 1063659, 1063702, 1063758, 1063765, 1063777, 539601, 277459, 1063892, 539648, 15382, 1063974, 277556, 15441, 801907, 277623, 277632, 1007473, 277701, 1064140, 277737, 1019649, 15607, 1064195, 1064206, 277785, 277799, 539957, 277977, 1064473, 278074, 802372, 1064518, 1064529, 832790, 540306, 253678, 1064687, 540432, 540456, 802634, 1064808, 1064852, 278429, 1094996, 1064961, 278542, 1065032, 278606, 278658, 1065118, 16559, 1065160, 540906, 1065227, 540983, 278863, 1095059, 803237, 541135, 16860, 803306, 1065494, 1065551, 1065558, 541272, 541274, 1065650, 279229, 17110, 1095121, 1065712, 541425, 1065728, 1095126, 803599, 541557, 46579, 1065971, 1065985, 17430, 1066043, 1066116, 279718, 17635, 541948, 804103, 541969, 804197, 279987, 17848, 17884, 789439, 658372, 280223, 18101, 1066709, 1066716, 1066792, 804687, 1066916, 1066958, 1066966, 1066971, 542806, 323592, 804996, 1067276, 1067284, 18759, 280927, 543251, 134239, 1067587, 1067640, 1067659, 281270, 1067724, 1067764, 1067772, 1067826, 543644, 19457, 805900, 543813, 19552, 281702, 281704, 543849, 1095542, 543951, 1068276, 1068290, 544060, 281930, 1068408, 544123, 19940, 544277, 544308, 544319, 282214, 1068715, 806574, 1068726, 282397, 806688, 282411, 833507, 995787, 1068924, 20356, 1068952, 305650, 282530, 20432, 1069028, 544745, 1051990, 544811, 1069108, 1069128, 20597, 1069222, 20671, 544974, 1069313, 833579, 1069327, 1069344, 1069405, 1069474, 1069521, 1069556, 283141, 545359, 1052089, 1069717, 807585, 545450, 21185, 576822, 283344, 545575, 178325, 283548, 1069981, 1069983, 807880, 1070131, 545847, 808019, 21603, 1100581, 21741, 21765, 1095899, 1070412, 21861, 1070452, 284072, 808362, 21948, 1070546, 463373, 790110, 1095952, 284313, 546459, 22231, 808716, 1070867, 47419, 284565, 22479, 1071061, 546825, 1071198, 546956, 1071270, 547018, 547089, 1071389, 547139, 22882, 1071485, 1071534, 1071545, 547301, 285158, 1071598, 809556, 484454, 1071722, 166625, 285375, 831030, 23285, 809798, 47588, 285537, 1071992, 309745, 809933, 285656, 285729, 1072188, 548036, 810210, 810242, 548099, 1057007, 810270, 178741, 810324, 47674, 1072479, 1072513, 810394, 548254, 286160, 1072603, 1072750, 548475, 1052610, 810680, 1072874, 47741, 548673, 1072988, 24441, 790536, 1073358, 1073365, 24807, 549135, 24979, 1073569, 25025, 25036, 47864, 549342, 1073640, 1073721, 1073801, 1073805, 25294, 821372, 703268, 1073943, 1073972, 1073980, 1074001, 549731, 549738, 559507, 25534, 25603, 1096557, 287912, 1074499, 812387, 288139, 550331, 288200, 26079, 26207, 1074804, 1074807, 397592, 1096644, 550565, 812734, 1074883, 550609, 26334, 1074949, 1096667, 1074989, 1074995, 1074997, 934889, 288702, 26664, 1075244, 1075262, 1075313, 288884, 1075336, 1075348, 551119, 26847, 1075588, 1075591, 1075608, 1075636, 1075656, 551413, 289276, 1075713, 1075741, 786520, 572517, 1075919, 289556, 289586, 1076030, 813899, 1076078, 551819, 551860, 27618, 289812, 1076269, 27743, 1100783, 814282, 27932, 1074603, 28216, 814699, 290488, 834848, 290499, 48417, 1077000, 1077002, 1077006, 290585, 1077019, 290632, 814964, 814987, 552868, 815015, 290779, 1097040, 290830, 1077356, 1097066, 815243, 105709, 815420, 815421, 29097, 291248, 815580, 29169, 291396, 1077844, 922398, 291516, 29416, 1078187, 29612, 1078198, 1078222, 1097236, 816289, 1078446, 1078491, 292094, 1097259, 572978, 816483, 292225, 1078731, 1078752, 30188, 1078765, 554511, 1078906, 1078920, 554738, 816893, 1079050, 1079086, 292676, 1079231, 292813, 398335, 1079340, 1079434, 555179, 817349, 1079535, 227992, 817597, 555458, 791629, 31222, 1079815, 1079817, 1079831, 293401, 555558, 293421, 555590, 1079888, 1079959, 1079987, 31432, 1080010, 1080031, 555750, 555850, 31595, 952520, 1080253, 1080406, 1080419, 293992, 556144, 556166, 1080495, 556217, 556248, 1080537, 1080555, 556307, 879155, 1097619, 32176, 818612, 556476, 556587, 1080937, 1080939, 1080948, 1080950, 818819, 1080970, 818842, 1081086, 1081091, 32642, 556952, 556976, 1097723, 1081338, 557157, 1054071, 295135, 1081569, 1081595, 1081609, 557401, 557417, 1081730, 819618, 557492, 1081946, 1082002, 1082091, 1082117, 1082242, 1082263, 1082265, 1082281, 1082332, 1082336, 1082339, 1082341, 1082351, 1082384, 1100986, 1082427, 1082445, 1082448, 1082455, 1082501, 1082502, 1082531, 1082536, 1082547, 558263, 1082576, 34015, 1082603, 1082607, 34039, 1082622, 1082653, 1082668, 1082730, 558448, 1082750, 1082751, 1082759, 1082779, 1082792, 1082807, 1082835, 1082840, 1082870, 1082872, 1082877, 1082893, 1082924, 1082947, 1082948, 1082966, 1082978, 1083000, 1083010, 1083017, 1083021, 1083052, 1083085, 839137, 1083092, 1083095, 1083108, 820973, 1083125, 1083127, 1083150, 1083152, 1083157, 1083158, 1083161, 1083243, 1083267, 1083268, 1083278, 1083285, 1083296, 559009, 559018, 1083307, 1098057, 1083332, 1083340, 1083341, 1083342, 1083345, 1083361, 1083362, 1083401, 1083410, 296993, 1083428, 1083430, 1083443, 1083472, 559198, 1083493, 1083499, 1083500, 1083502, 1083508, 1083517, 1083535, 1083537, 1083584, 1083597, 1083598, 1083611, 1083627, 1083641, 1083663, 1083675, 1083690, 1083713, 1083721, 1083722, 1083727, 1083743, 1054440, 1083783, 1083791, 1083797, 1083800, 1083819, 1083822, 1083831, 1083832, 1083846, 1083852, 1083865, 559607, 1083909, 1083933, 1083945, 1083948, 1083967, 559709, 1084038, 1084041, 1084075, 1084076, 1084086, 297672, 1084192, 1084197, 1084230, 1084233, 559959, 1084273, 1084276, 1084289, 1084301, 1084308, 1084324, 1084326, 1084330, 1084336, 1084354, 1084383, 1084403, 1084408, 1084435, 1084441, 1084469, 1084475, 1084478, 1084512, 1084516, 1084518, 560245, 1084555, 1084582, 1084599, 1084602, 1084603, 1084624, 1084686, 36133, 1084712, 1084713, 1084722, 822585, 1084755, 1084769, 822649, 1084814, 1084838, 1084848, 298444, 1084887, 1084889, 1084898, 1084906, 1084910, 1084930, 1084942, 1084971, 1084982, 1084986, 822859, 1085008, 1085013, 1085048, 1085139, 1085141, 1085192, 1085197, 1085229, 1085245, 1085279, 1085288, 1085303, 1085319, 1085327, 1085339, 1085341, 823203, 1085348, 1085351, 1085356, 298940, 1085386, 1085393, 1085421, 1085422, 1085434, 1085441, 1085454, 299023, 1085456, 1085457, 1085510, 1085517, 1085521, 299094, 1085532, 1085533, 1085535, 299110, 1085545, 1085550, 823421, 1085572, 1085584, 1085586, 1085613, 1085630, 1085658, 1085674, 576360, 1085733, 1085741, 1085760, 1085762, 1085764, 1085775, 1085777, 1085779, 1085780, 299350, 1085796, 1085804, 1085812, 1085842, 1085845, 1085862, 1085889, 1085918, 1085924, 1085930, 1085936, 1085943, 1085967, 1085980, 1086008, 1086014, 1086022, 1086046, 1086075, 1086085, 1086120, 1086145, 299732, 1086174, 1086186, 1086200, 1086224, 1086241, 1086266, 1086271, 1086281, 1086288, 1086296, 1086305, 1086309, 1086326, 1086354, 299939, 1086384, 1086385, 1086424, 1086430, 1086439, 530602, 1086468, 1086491, 1086498, 530611, 1086532, 1086555, 1086565, 1086575, 1086581, 1086594, 1086595, 1086628, 1086637, 1086679, 1086681, 1086693, 1086701, 1086708, 1086713, 1086715, 300312, 1011248, 1086760, 1086765, 1086834, 1086836, 1086855, 1086860, 1086874, 1086883, 1086886, 1086893, 1086915, 1086917, 1086927, 1086928, 1086942, 1086974, 1086976, 1087014, 1087018, 1087042, 1087046, 1087047, 1087050, 1087061, 1087066, 1087074, 1087077, 1087105, 300674, 1087114, 562827, 1087122, 1087126, 1087129, 1087171, 1087173, 1087185, 1087186, 1087204, 1087215, 1087226, 1087238, 1087269, 1087309, 1087317, 1087327, 1087351, 1087361, 300933, 1087375, 792900, 563119, 1087425, 1087435, 1087455, 1087484, 1087486, 1087487, 1087492, 301061, 1087514, 1087544, 1087556, 1087566, 1087581, 1087603, 1087604, 1087609, 1087634, 1087675, 1087680, 1087687, 1087690, 1087722, 1087727, 1087728, 1087729, 1087736, 1087764, 1087766, 1087774, 1087795, 1087803, 1087835, 1087848, 1087858, 1087869, 1087870, 1087904, 1087911, 563652, 1087959, 1087967, 1087969, 1087999, 825948, 825954, 1088138, 39577, 1088164, 1088209, 1088210, 1088211, 1088221, 563943, 1088252, 1088254, 563995, 1088302, 1088309, 1088311, 1088332, 1088349, 1088358, 1088379, 1088434, 1088437, 1088444, 1088453, 1088475, 1088502, 1088510, 1088512, 1088515, 1088539, 1088541, 1088606, 1088628, 40056, 1088648, 1088653, 826513, 1088658, 1088685, 1088693, 1088715, 1088734, 1088742, 1088758, 302337, 564509, 1088800, 1088816, 1088832, 1088845, 1088856, 1088869, 1088875, 1088884, 1088889, 1088903, 40337, 1088915, 1088928, 1088938, 1088958, 1088960, 1088973, 1088987, 1088993, 1089001, 1089002, 1089021, 1089022, 1089026, 1089027, 1089036, 1089043, 1089044, 1089051, 1089071, 1089085, 1089093, 1089121, 1089143, 1089156, 1089158, 1089164, 1089167, 1089177, 1089214, 1089246, 1089273, 1089277, 1089286, 1089293, 1089312, 1089325, 1089355, 1089376, 1089401, 1089406, 1089408, 1089414, 1089434, 1089438, 1089443, 1089469, 560673, 1089498, 1089501, 1089511, 565231, 1089521, 1089541, 1011713, 1089558, 1089560, 1089576, 1089597, 1089619, 1089639, 1089645, 1089656, 1089674, 1089678, 1089683, 1089688, 1089691, 1089693, 1089696, 1089706, 1089719, 1089727, 1089750, 1089760, 1089763, 1089776, 1089787, 1089804, 1089805, 1089810, 1089832, 1089846, 1089868, 1089896, 1089906, 1089925, 1089940, 1089945, 1089964, 1089966, 1089983, 565696, 1090029, 1090043, 1090054, 1090063, 1090072, 1090077, 1090086, 1090100, 1090102, 1090107, 1090110, 1090115, 1090132, 1090146, 1090151, 565868, 1090165, 1090170, 1090171, 1090184, 303777, 303790, 1090242, 1090270, 1090350, 1090364, 303934, 1090374, 1090388, 813193, 1090395, 1090400, 1090456, 1090458, 566174, 1090472, 1090530, 1090540, 41969, 1090613, 566335, 1090624, 909221, 1090700, 1090701, 828596, 1090742, 1090758, 1090791, 1090808, 1011925, 1090833, 1090838, 1090839, 1090841, 1090842, 1090861, 1090869, 1090877, 1090886, 1090910, 828779, 1090924, 837181, 42361, 1090945, 1090965, 1090987, 1091015, 1091048, 1091059, 1091068, 1091108, 1091112, 1091115, 1091116, 42568, 400311, 1091153, 1091164, 829025, 1091173, 1091177, 1091194, 566946, 1091246, 829103, 1091255, 1091330, 1091340, 1091421, 567159, 1091450, 1091467, 1091471, 1091473, 1091479, 1091513, 1091520, 1091522, 1091529, 1091535, 829425, 1099433, 1091576, 1091595, 1091630, 1091643, 1091654, 1091659, 1091661, 1091665, 1091667, 1091681, 1091688, 1091715, 1091719, 567443, 567452, 1091749, 305333, 1091767, 1091786, 305361, 1091807, 1091811, 1091833, 1091865, 567630, 1091923, 1091941, 1091955, 1091983, 1092005, 1092007, 1092010, 1092013, 1092023, 1092042, 837375, 1092093, 1092095, 1092105, 1092108, 1092120, 1092143, 1092159, 1092161, 1092162, 1092165, 1092168, 1092176, 1092180, 567895, 1092203, 1092236, 1092237, 1092238, 1092257, 1092258, 1092259, 1092263, 1092297, 1092311, 1092327, 1092330, 1092342, 1092348, 43781, 1092391, 1092394, 1092416, 1092417, 1092470, 1092474, 1092482, 1092484, 1092522, 1092528, 1092543, 1092551, 1092557, 1092605, 924844, 1092643, 44072, 1092665, 830551, 1092715, 1092724, 1092738, 1092751, 1092756, 1092757, 1092759, 1092791, 1092792, 1092796, 568526, 1092822, 1092832, 1092858, 1092863, 1092865, 1092870, 568585, 1092910, 1092911, 1092919, 568649, 1092942, 1092952, 1092972, 1092978, 1092984, 568703, 1092996, 1093006, 1093031, 1093038, 1093042, 1093064, 1093094, 1093096, 1093104, 1093107, 1093112, 1093128, 1093172, 1093179, 1093181, 831052, 1093200, 1093202, 1093235, 1093238, 1093255, 1093256, 1093305, 1093312, 1093359, 1093399, 1093405, 1093406, 1093407, 1093410, 1093419, 307005, 1093438, 1093443, 1093444, 831315, 1093462, 1093481, 206762, 1093534, 1093540, 1093552, 1093556, 1093561, 1093564, 1093570, 1099767, 1093621, 1093637, 1093650, 1093682, 1093698, 45125, 831560, 1093717, 1093723, 1093732, 831601, 1093750, 1093757, 1093773, 1093781, 1093786, 1093791, 1093795, 569555, 1093845, 1012431, 1093855, 1093866, 1093875, 1093881, 1093901, 1093915, 1093920, 307492, 1093926, 1093927, 1093941, 307521, 1093959, 1093962, 1093966, 1093971, 569689, 1093998, 94953, 831871, 1094027, 1094056, 1094062, 1056159, 1094081, 1094085, 51276, 831962, 1094110, 1094141, 1094145, 45590, 1094175, 1094197, 1094204, 1094215, 1094220, 569939, 1094232, 1094241, 1094271, 1094275, 1094316, 832188, 570070, 1094364, 1094369, 1094370, 1094394, 1094406, 1094440, 1094451, 1094453, 1094460, 1094469, 1094477, 45924, 1094501, 1094509, 1094519, 1094536, 1094566, 1094578, 1094579, 1094612, 1094634, 46095, 1094689, 1094691, 1094693, 1094699, 1094724, 1094755, 1094759, 1094825, 1094840, 1094869, 1094943, 1094959, 1094962, 1094982, 1094991, 1094999, 1095012, 570725, 1095055, 1095058, 1095066, 1095085, 1095092, 1095108, 308687, 1095155, 570905, 1095233, 1095238, 570979, 1095278, 46711, 1095306, 1095308, 1095332, 1095335, 1095354, 1095357, 1095360, 1095371, 1095377, 571103, 1095437, 1095469, 1095478, 1095490, 1095495, 1095523, 1095537, 1056405, 1095555, 1095557, 1095558, 1095560, 1095566, 1095571, 1095631, 1095633, 1095641, 1095650, 1095654, 1095687, 1095699, 1095704, 1095705, 1095711, 1095716, 1095723, 1095725, 1095747, 1095749, 571474, 1095787, 1095798, 1095806, 1095807, 1095816, 1095845, 47270, 1095856, 1095857, 1095864, 1095868, 1095874, 1095876, 1095881, 1095921, 1095922, 1095928, 1095942, 1095955, 1095966, 1095971, 1095982, 571696, 1095988, 1095994, 95286, 1096006, 1096021, 1096025, 1096044, 1096045, 1096049, 1096065, 1096087, 1096118, 1096126, 1096180, 1096207, 1096211, 1096227, 571954, 1096252, 1096256, 1096258, 1096262, 1096268, 47716, 1096311, 1096347, 1096357, 1096360, 1096368, 1096371, 1096375, 1096376, 1096401, 1096425, 1096429, 1096431, 1096454, 1096457, 1096463, 1096475, 1096476, 1096479, 1096493, 1096498, 1096509, 1096516, 1096527, 1096532, 1096533, 1096541, 1096543, 1096551, 572286, 1096605, 1096607, 1096610, 1096619, 1096620, 1096628, 1096641, 1096656, 1096658, 1096694, 1096712, 1096739, 1096776, 1096787, 1096788, 1096794, 1096823, 1096827, 1096830, 1096840, 1096850, 1096855, 1096866, 1096870, 1096886, 1096887, 1096911, 1096932, 1096943, 1096944, 1096945, 1096947, 1096958, 1096964, 1096983, 1096998, 1097014, 1097023, 1097027, 1097069, 1097087, 1097093, 1097100, 1097118, 1097119, 1097135, 1097153, 1097154, 1097195, 1097198, 313940, 1097213, 1097223, 1097242, 310853, 1097294, 1097298, 1097304, 1097314, 1097317, 835206, 1097359, 1097373, 310948, 1097386, 1097438, 573157, 1097448, 1097449, 1097461, 1097469, 1097492, 311067, 1097508, 1097523, 1097537, 1056742, 48998, 1097585, 1097602, 835478, 1097674, 1097721, 573452, 1097746, 1097786, 1097796, 49234, 1005907, 1097894, 1097905, 1097906, 1097909, 1097937, 1097979, 1097995, 1097999, 1098010, 49435, 1098013, 1098044, 1098048, 1098071, 1098090, 1098101, 1098102, 1098110, 1098111, 1098169, 1098180, 1098182, 1098222, 1098226, 1098236, 573954, 1098249, 1098276, 1098284, 574002, 1098322, 1098338, 1098354, 1098355, 1098422, 1098440, 1098452, 1098481, 1098497, 1098510, 1098520, 1098523, 1098536, 1098556, 1098561, 1098570, 1098600, 1098608, 1098609, 1098641, 1098646, 1098698, 1098719, 1098725, 1098763, 1098765, 836640, 1098787, 312368, 1098802, 1098804, 1098809, 574547, 1098846, 574569, 1098860, 1098874, 1098895, 1098905, 1098909, 1098927, 1098967, 836832, 1099050, 1099065, 1099072, 1099084, 1099099, 1099108, 1099178, 1099189, 1099217, 1099219, 1099226, 574944, 1099244, 1099284, 1099288, 1099290, 1099321, 1099340, 1099342, 837202, 1099351, 1099368, 575096, 1099391, 575146, 1099451, 1099452, 1099456, 50891, 1099482, 1099488, 1099530, 575268, 1099595, 837476, 1099626, 1099632, 1099636, 1099653, 1099656, 1099670, 575407, 1099700, 1099706, 1099729, 1099733, 1099739, 1099756, 1099761, 575492, 1099805, 1099806, 1099816, 1099834, 1099836, 1099855, 1099859, 1099865, 1099880, 1099888, 1099903, 1099911, 1099914, 1099943, 1099947, 1099955, 1099980, 1099981, 1099985, 1099998, 1100010, 1100035, 1100051, 1100064, 1100070, 1100077, 1100094, 1100105, 1100106, 1100119, 1100137, 1100138, 1100143, 1100151, 1100167, 1100168, 1100173, 1100187, 1100188, 1100190, 1100192, 1100218, 1100224, 1100226, 1100229, 838101, 1100299, 1100308, 1100319, 1100357, 1100370, 1100403, 1100415, 1100438, 1100454, 1100455, 1100457, 1100458, 576195, 1100486, 1100488, 1100492, 1100496, 1100499, 1100505, 1100533, 1100537, 1100541, 1100544, 1100580, 576312, 1100634, 1100640, 1100661, 1100687, 1100724, 1100732, 314307, 576452, 1100765, 1100772, 1100816, 1100839, 1100852, 1100855, 1100875, 1100919, 1100930, 1100933, 1100980, 838845, 1101018, 1101044, 1101048, 1101055, 1101088, 1101090, 1101121, 1101171, 1101172, 1101173, 1101211, 1101214, 1101228, 1101236, 1101259, 1101276, 1101278, 1101279, 1101296, 1101300, 1101336, 1101341, 1101347, 8798, 1101374, 1101394, 577131, 445714, 1101434, 1101448, 577167, 1101466, 1101467, 1101503, 1101535, 1101552, 315131, 1101566, 1101567, 1101568, 1101576, 1101603, 1101661, 1101665, 1101668, 1101670, 1101674, 1101706, 1101714, 315291, 1101739, 1101761, 1101131, 53191, 1101784, 1101806, 576851, 1101822, 1101861, 1101869, 1101870, 1101902, 1101906, 1101961, 1101977, 1101995, 1102001, 839878, 1102028, 1102088, 577813, 1102121, 1102163, 840053, 1102206, 577930, 1102235, 1102240, 1102262, 1102300, 315884, 1102325, 1102330, 1102335, 53814, 1102393, 1102400, 53897, 708038, 53991, 840445, 1101271, 839128, 54040, 1092930, 578362, 1101282, 840532, 1101298, 54199, 1101303, 54235, 571237, 54307, 314907, 578735, 578783, 574730, 54531, 54544, 841020, 1101365, 54648, 795540, 316803, 841165, 54819, 54843, 579133, 882982, 1101443, 841521, 227637, 579479, 55223, 841665, 1014131, 841919, 841961, 841980, 489858, 842070, 842108, 55682, 55691, 140367, 842221, 842223, 842272, 55848, 842333, 318073, 580313, 56033, 580411, 580450, 842596, 56188, 580483, 988253, 839528, 53109, 1101698, 843140, 1101721, 1101723, 56740, 953355, 36388, 843409, 56993, 795951, 577511, 581521, 57258, 581552, 581666, 319564, 1101845, 319652, 581801, 1101868, 57614, 1101871, 581975, 844128, 796056, 970830, 604229, 582146, 792977, 320025, 57882, 320051, 844390, 320117, 58074, 58130, 844594, 320320, 1058284, 582557, 582641, 752473, 58409, 582705, 58551, 582848, 58571, 58583, 1093082, 320792, 58801, 320970, 1014697, 845304, 583234, 59030, 583325, 583369, 59084, 321239, 845529, 1086279, 59190, 59204, 59217, 845719, 583611, 845790, 59392, 583686, 59426, 845888, 845892, 1102177, 583766, 583798, 9926, 583916, 840061, 846082, 59654, 307504, 1084389, 321918, 321951, 846291, 1014885, 846438, 1049484, 322211, 846513, 322345, 584500, 584569, 584592, 60339, 60357, 846806, 584695, 1093142, 1102351, 584905, 60677, 141185, 578100, 53813, 1058717, 585165, 10157, 323096, 323154, 585344, 585378, 61180, 323382, 61277, 847722, 847726, 323535, 585680, 323555, 61452, 585743, 585806, 1005888, 61531, 61623, 1058853, 848100, 323815, 61836, 323998, 61882, 586268, 848432, 324159, 848478, 1093196, 62055, 62136, 62411, 62439, 586740, 586785, 324645, 586790, 62554, 586916, 62648, 849142, 272605, 849337, 927989, 587326, 849561, 325292, 63152, 587524, 199442, 587674, 403793, 97972, 63548, 587853, 176701, 578607, 325929, 850236, 185397, 588122, 51054, 850450, 326190, 850555, 850557, 64179, 971904, 326410, 326417, 588627, 326509, 850820, 753479, 447648, 326629, 588829, 851004, 326719, 64711, 64960, 851425, 65000, 65038, 851490, 589423, 489257, 65267, 589564, 851813, 65488, 589777, 327640, 65583, 65584, 852037, 327750, 589903, 65627, 852179, 65770, 1093322, 328072, 65957, 271038, 66161, 66281, 328474, 328527, 66389, 1058425, 328611, 328629, 328704, 590861, 1093349, 590945, 328814, 591026, 66771, 66908, 853344, 329114, 1091569, 591310, 853471, 203003, 67200, 853646, 329369, 853699, 1102099, 329515, 853882, 1059820, 853995, 591898, 591940, 854085, 591993, 28442, 329901, 67802, 329958, 592192, 592220, 592235, 672429, 68095, 220495, 592495, 330419, 330450, 592601, 854785, 1016281, 592672, 330560, 854862, 330640, 1049767, 987720, 855031, 613318, 330792, 68832, 593135, 593275, 331141, 855546, 331352, 593541, 855725, 855727, 593732, 593792, 69506, 855968, 856171, 230082, 69789, 69871, 594295, 856568, 70340, 332600, 70504, 594793, 594831, 70595, 594930, 332797, 70709, 70720, 70787, 70852, 1093491, 333327, 71238, 988754, 595568, 595577, 333434, 831474, 333486, 1093507, 333579, 1049867, 333700, 596088, 596130, 1016869, 878615, 596468, 334433, 72398, 334558, 596716, 72435, 99461, 72485, 334754, 72613, 788035, 334867, 334904, 334916, 859229, 859274, 859376, 73094, 597384, 73106, 597395, 73119, 46081, 859669, 73257, 859870, 335710, 335711, 597870, 860071, 335910, 99676, 73788, 860266, 73853, 860462, 860542, 796812, 860655, 74328, 74356, 860942, 598802, 74637, 861169, 74759, 337073, 861403, 857943, 861433, 337190, 337209, 798967, 449442, 599524, 599550, 75266, 709559, 75335, 75342, 337509, 599720, 667932, 875417, 75608, 75698, 75717, 337864, 56323, 75801, 338040, 405867, 862345, 600231, 1102390, 862448, 600350, 862640, 76283, 862856, 338637, 1050033, 338696, 338713, 143849, 863112, 76770, 338917, 1017687, 601128, 339009, 863387, 77034, 863623, 863738, 77323, 601624, 601629, 339501, 77391, 601684, 77424, 77491, 339888, 339934, 339981, 340006, 77878, 602352, 864507, 602413, 602652, 78418, 1017966, 340712, 602957, 78730, 603031, 603050, 755878, 341039, 865426, 865476, 865518, 341317, 603773, 865971, 341736, 866101, 79698, 866139, 866251, 604113, 604153, 79891, 362845, 342115, 866428, 342156, 342285, 342450, 604619, 604628, 604673, 1061994, 80590, 604954, 1018359, 80876, 605169, 867490, 81075, 81137, 605467, 605648, 867947, 821068, 81649, 974808, 868184, 606117, 343976, 868410, 81993, 868487, 868525, 82100, 82161, 868598, 344400, 57402, 82293, 57411, 82379, 868919, 995654, 869035, 606944, 712832, 82842, 869308, 869348, 82949, 607292, 607338, 259885, 975040, 607374, 869519, 345350, 345453, 607599, 869759, 869827, 83448, 83458, 869891, 83506, 607855, 83621, 450851, 188714, 1016611, 870348, 608323, 870544, 916306, 870693, 608557, 870861, 870875, 84473, 84520, 844211, 997932, 609024, 84778, 609104, 871376, 85053, 85095, 1091690, 609469, 800652, 609628, 954307, 347491, 609799, 872081, 609956, 610056, 975495, 610128, 610132, 610190, 85904, 872347, 482412, 85954, 348136, 86094, 348242, 1019236, 610425, 800792, 872632, 86203, 872655, 1019262, 86264, 872777, 872823, 931905, 872855, 610716, 872869, 348594, 872946, 610898, 86624, 669800, 610940, 348869, 873250, 348994, 611199, 611271, 87019, 611366, 611468, 58234, 611747, 873914, 975775, 189355, 87701, 874299, 87892, 87926, 874455, 88160, 612471, 874691, 1090542, 88284, 88375, 612670, 874827, 874876, 858391, 874914, 612846, 88577, 535421, 613179, 613214, 613233, 89143, 622734, 875787, 875796, 1019783, 613694, 757644, 89418, 613755, 875986, 613852, 613870, 89610, 351820, 614047, 89777, 614069, 888777, 89786, 614121, 614186, 614286, 352236, 614409, 90169, 90209, 614598, 876924, 352818, 877161, 1063709, 932639, 615219, 90941, 877453, 615383, 615457, 615624, 91345, 877810, 877845, 91422, 353623, 877938, 91711, 91722, 616045, 91790, 91881, 878367, 616331, 354222, 823596, 616447, 92260, 980726, 354515, 878817, 878840, 92437, 627085, 878959, 92542, 471983, 617167, 617246, 1085497, 321363, 879657, 93234, 93308, 93311, 355458, 879747, 617611, 355484, 998223, 1064155, 617795, 93649, 1020500, 93823, 618223, 452572, 880527, 618408, 618486, 880766, 618818, 356916, 94782, 619087, 94865, 619159, 540109, 357162, 881582, 357340, 881695, 881723, 619675, 95409, 619805, 38098, 882002, 95651, 882141, 358150, 358240, 147090, 96250, 96310, 358455, 96379, 96420, 1091765, 96602, 1020999, 620992, 96749, 883282, 359040, 1094361, 621419, 97295, 359499, 883861, 453175, 97612, 97652, 97766, 759038, 622100, 97895, 97964, 989644, 884533, 98151, 802794, 884722, 693736, 622658, 622725, 884870, 884878, 893642, 42555, 622893, 885081, 98682, 885153, 885184, 98817, 98847, 885301, 885308, 623281, 885433, 885505, 1024599, 99183, 247717, 99267, 234998, 361594, 361620, 885932, 885986, 99556, 623857, 99805, 624143, 362016, 624176, 886332, 624199, 362076, 886382, 100013, 100020, 100046, 822937, 624503, 100250, 100307, 624644, 100364, 584727, 624790, 605651, 497360, 624876, 100616, 100661, 625022, 1065388, 143025, 887392, 887395, 887398, 919913, 625458, 363332, 1065448, 101451, 887906, 363637, 625782, 888100, 626005, 626232, 626318, 888559, 626462, 888796, 978259, 626701, 888911, 888934, 102506, 191853, 888988, 889046, 102627, 626918, 889104, 102695, 483521, 889289, 365044, 627323, 103125, 627513, 1094575, 847415, 628056, 628085, 1022198, 890532, 824000, 628532, 104290, 890890, 978605, 803861, 891082, 891498, 891565, 367290, 517245, 891719, 1049926, 1066161, 17586, 105549, 300306, 760367, 892224, 892329, 892454, 368229, 630391, 106125, 106508, 368728, 630905, 893275, 107077, 893681, 1022762, 107283, 164946, 893789, 631724, 236362, 894161, 632055, 323798, 107812, 632106, 369981, 632192, 370068, 1094727, 108037, 632394, 1094731, 632455, 370316, 894610, 632536, 632625, 108507, 632825, 804523, 370734, 108622, 632923, 370979, 542431, 633153, 149221, 371204, 633350, 760930, 633617, 633635, 1023111, 1042399, 895932, 371695, 633916, 109647, 633986, 633994, 634038, 109819, 634113, 634126, 109841, 634174, 372070, 896383, 518675, 372137, 326637, 634412, 634489, 372378, 634583, 896931, 372792, 635044, 635058, 635079, 897240, 635125, 804905, 635150, 635237, 897401, 373121, 463133, 897476, 373209, 635497, 635626, 897789, 635647, 897910, 897953, 635823, 897981, 111723, 630318, 636188, 280796, 111995, 112035, 280825, 761425, 1094863, 636434, 636437, 898686, 898714, 112318, 149801, 112477, 374690, 636853, 636929, 374799, 636949, 112718, 717845, 899212, 637208, 637234, 899423, 18840, 375291, 929046, 637459, 899800, 899869, 900062, 900076, 900077, 113664, 113732, 375891, 900450, 900599, 638503, 900696, 900731, 376537, 638723, 900924, 638795, 114573, 114633, 114638, 638928, 849245, 639084, 313262, 639163, 901355, 639412, 639545, 639560, 115365, 674595, 1086248, 377805, 115704, 640103, 115833, 412532, 115930, 640232, 47513, 902657, 79763, 902855, 116431, 116455, 1067990, 902919, 902931, 116517, 1024312, 309402, 893271, 903235, 116820, 849596, 641156, 955359, 116939, 237945, 641284, 641293, 117036, 903479, 117113, 379337, 641583, 641618, 903811, 980789, 904007, 117683, 980811, 117728, 642032, 194430, 642352, 904542, 1100639, 904727, 118365, 118457, 380755, 905057, 905479, 119089, 119168, 905604, 643561, 905707, 643572, 905766, 119534, 119683, 906126, 644023, 119761, 194724, 544260, 119975, 1068584, 906692, 644658, 833268, 644746, 824542, 907046, 907127, 645024, 645252, 121017, 121023, 38946, 645343, 907538, 1095165, 645472, 645604, 850361, 907997, 645892, 908069, 908154, 646071, 908237, 908316, 646179, 908489, 646354, 384406, 646623, 564707, 122440, 831815, 122582, 719488, 122639, 909111, 384845, 909176, 122807, 384985, 647260, 909547, 20520, 647503, 647687, 680490, 909886, 385652, 123529, 647872, 647876, 647949, 123710, 910150, 648049, 648119, 123859, 798469, 910375, 123975, 124128, 910622, 910777, 910818, 910870, 386653, 124534, 545052, 648877, 911032, 911056, 588775, 124787, 649110, 649200, 492988, 649294, 562821, 64528, 257309, 649451, 911605, 108287, 588888, 387603, 649763, 387662, 125545, 649893, 125627, 865384, 387848, 125705, 387864, 650076, 912234, 125842, 125898, 125996, 650378, 650462, 850919, 388465, 388588, 912879, 912898, 126491, 126525, 912961, 912992, 913098, 913137, 913286, 651187, 913374, 913509, 127098, 389258, 913568, 913579, 1098953, 632726, 389385, 127315, 850957, 938773, 127682, 977467, 914321, 914368, 914406, 127984, 914545, 128158, 128166, 128178, 128200, 914637, 1026258, 652556, 914707, 914771, 390484, 1086675, 807845, 178859, 652912, 128633, 390813, 652961, 895263, 653041, 938963, 653054, 653092, 915305, 653187, 391101, 391125, 915544, 129205, 129228, 129229, 915762, 915769, 129491, 129517, 391662, 129641, 129684, 916186, 129792, 129837, 130034, 392195, 654459, 392350, 633375, 392393, 392488, 654633, 392501, 633399, 786477, 916901, 1070324, 917015, 917022, 808200, 1070361, 655046, 21793, 392905, 655057, 392936, 130825, 917283, 917334, 130932, 917489, 393203, 917536, 393268, 393420, 393462, 917789, 371420, 917825, 1026789, 393696, 131597, 131665, 153027, 393881, 131768, 393954, 824920, 131873, 394021, 918324, 131925, 394095, 656250, 656345, 656371, 656376, 132104, 132151, 240489, 132263, 656602, 132317, 132359, 918800, 132473, 656859, 808528, 983299, 132639, 657091, 919310, 395038, 1070728, 657204, 896479, 657264, 133037, 395382, 919712, 395538, 939744, 395786, 657974, 763878, 920218, 808746, 134014, 920458, 658498, 396391, 920717, 920753, 658667, 920885, 789997, 921173, 66154, 134861, 659182, 921348, 659230, 397090, 659247, 135079, 921621, 397417, 921812, 135386, 135464, 135465, 135516, 922024, 135633, 135635, 659929, 660046, 922335, 660220, 922389, 109276, 22670, 136098, 398258, 136157, 922593, 660479, 136209, 660534, 398447, 660672, 660803, 660957, 136700, 503381, 660999, 661028, 661076, 459707, 661398, 399364, 399414, 399527, 137411, 137440, 399617, 137508, 853057, 924047, 661945, 662016, 399970, 137889, 137919, 662282, 662334, 138127, 924567, 662436, 138223, 662524, 138266, 372586, 924895, 400631, 138492, 924978, 400692, 126821, 925059, 138629, 138640, 400803, 663006, 138793, 663131, 139090, 663388, 925571, 401287, 139239, 925766, 23223, 663679, 110614, 139405, 663771, 401640, 925951, 663820, 926019, 663890, 926064, 663950, 401878, 1028179, 139767, 664138, 139897, 664194, 139929, 402075, 926436, 140161, 402318, 140216, 140238, 926700, 402417, 402427, 926980, 140696, 1090961, 665009, 927196, 140804, 809909, 198246, 140921, 665231, 403095, 927553, 766272, 547820, 403361, 403388, 403454, 161828, 141353, 766301, 722615, 141472, 984774, 665972, 141694, 488021, 403954, 404051, 142039, 928478, 404202, 928567, 928572, 142153, 928753, 589586, 142382, 666694, 142411, 666792, 404713, 142579, 142782, 142831, 667136, 405036, 929372, 405090, 1090329, 929473, 667373, 405238, 405310, 405330, 1028752, 667535, 143293, 143424, 143464, 405660, 1072500, 1072506, 405737, 930124, 155041, 405985, 930293, 930326, 111377, 406140, 679360, 144028, 406181, 930483, 406205, 941515, 930534, 930549, 930621, 406351, 406386, 144254, 144285, 930721, 406525, 406576, 144491, 1034839, 144528, 406718, 746785, 144682, 24115, 144694, 931147, 240504, 669046, 406923, 406974, 144857, 407102, 407131, 669288, 407274, 669427, 669444, 931726, 931772, 810660, 931940, 407662, 669979, 407869, 670022, 145821, 670142, 145877, 408134, 408149, 932495, 417570, 408275, 670437, 670476, 146212, 146244, 146269, 408419, 408427, 932735, 670600, 1090352, 408563, 932878, 670829, 408696, 408739, 146598, 408765, 933132, 408945, 933236, 146812, 409071, 671219, 409143, 898318, 57270, 409207, 147073, 933551, 147166, 933652, 671579, 933742, 147337, 671692, 409557, 933861, 933946, 147542, 409694, 934134, 409854, 409887, 934223, 934235, 672109, 148016, 1052414, 672352, 672433, 792789, 942221, 934795, 148424, 672753, 934964, 148564, 286915, 148633, 243244, 148761, 148777, 148851, 673143, 898631, 935358, 935362, 935364, 811266, 149161, 935707, 680250, 149447, 800243, 411660, 935952, 935973, 855050, 549219, 149670, 549235, 673984, 936182, 149767, 149790, 411953, 936273, 301777, 149853, 936501, 150087, 412319, 412340, 412352, 674702, 412597, 674914, 986316, 412982, 413040, 413079, 820027, 937427, 151011, 156251, 937578, 413404, 675719, 937947, 151547, 938066, 938140, 413858, 413905, 200062, 938359, 676275, 414155, 25344, 414276, 676454, 768133, 152519, 414714, 939020, 414733, 152598, 414757, 414799, 939104, 1029291, 677212, 415165, 153037, 939473, 153048, 677460, 418752, 677519, 415474, 415500, 677672, 939866, 506181, 637254, 563771, 153588, 677936, 415815, 153739, 153794, 415962, 678176, 940386, 1096257, 153981, 768411, 416228, 154301, 416457, 154372, 940916, 940940, 678913, 154633, 416846, 165135, 154785, 941219, 679167, 417040, 417080, 200600, 155056, 808235, 155086, 679390, 155119, 417362, 155234, 417404, 941749, 679658, 941865, 637576, 417664, 679878, 155700, 417902, 417946, 680102, 418032, 418063, 942354, 418165, 680324, 418195, 156052, 680373, 418353, 156215, 942651, 680514, 418423, 156379, 418552, 755907, 942915, 418633, 680951, 92509, 943170, 943190, 594105, 418926, 418977, 156889, 681514, 681264, 157149, 419326, 419333, 681791, 419692, 157580, 681944, 1031173, 682025, 944181, 944194, 944245, 682105, 682205, 944451, 769085, 682365, 900164, 244808, 682425, 420304, 420365, 420400, 944700, 682626, 214771, 944949, 420673, 682910, 113826, 987644, 420867, 683045, 987657, 420934, 987660, 420980, 158887, 26485, 507086, 421145, 159078, 421437, 1075156, 267187, 159667, 421813, 1031502, 507221, 114037, 946428, 422152, 422268, 1031580, 684459, 422398, 160255, 160312, 946747, 160339, 422501, 944231, 946825, 422609, 422624, 684780, 160562, 422827, 684977, 160694, 160735, 685091, 160808, 422955, 990938, 685177, 682190, 423178, 565856, 161117, 947678, 685591, 947785, 161418, 423608, 423646, 947974, 616415, 423878, 424045, 424092, 948397, 686260, 686290, 948452, 948532, 551309, 1031910, 686469, 424408, 424449, 162351, 424509, 948829, 686739, 686746, 424753, 813536, 162662, 424898, 425072, 1100937, 687245, 163038, 813605, 638849, 687375, 1032074, 425330, 425375, 949686, 988412, 687615, 687632, 425505, 813675, 625205, 163570, 163602, 950139, 249866, 163860, 163912, 950355, 426214, 426347, 1079141, 164282, 426442, 688644, 901206, 426504, 950799, 688711, 1075980, 688739, 426622, 164528, 164912, 689223, 427086, 165002, 165007, 813953, 427340, 202306, 951820, 689700, 817309, 689851, 689885, 952047, 690010, 165807, 907173, 952378, 952388, 428113, 952445, 952452, 166043, 690508, 952658, 690565, 428424, 690606, 166403, 690705, 166468, 952926, 690801, 464663, 36025, 953020, 428773, 690956, 428819, 166680, 428847, 691004, 166748, 691055, 166784, 428941, 953274, 691141, 953332, 953351, 683193, 857956, 953445, 167156, 167204, 691507, 167229, 691709, 167436, 429664, 429675, 167566, 167620, 290091, 464860, 590433, 167994, 430142, 168000, 954455, 168069, 430229, 168175, 945535, 692494, 168238, 954711, 692577, 66707, 1096742, 989296, 955093, 692955, 955117, 168787, 955220, 693101, 814568, 430985, 430989, 693152, 693162, 693297, 693447, 693469, 431481, 693636, 693642, 169390, 955911, 508855, 302435, 956060, 694063, 169778, 858421, 596282, 956403, 432161, 28352, 1033249, 956624, 694560, 694561, 694678, 432602, 837372, 432653, 432680, 694845, 956993, 1090513, 170581, 432811, 432874, 170770, 170788, 695238, 695240, 170982, 433220, 957607, 957688, 433415, 171370, 433549, 433579, 171527, 433680, 433685, 433691, 171691, 695993, 958142, 171776, 241405, 958311, 1033534, 171906, 696217, 696242, 696312, 172062, 696404, 434369, 434462, 696738, 958993, 959034, 172608, 696918, 959083, 434835, 727837, 172787, 959228, 172981, 173001, 435412, 815320, 173391, 435541, 959854, 697780, 960003, 435794, 697972, 697983, 960265, 960302, 436091, 436100, 960397, 960437, 436249, 960566, 698445, 174273, 436475, 174344, 698719, 436586, 436602, 29089, 698828, 174592, 961048, 961097, 436844, 436847, 72809, 946631, 436924, 961255, 699243, 1010700, 437165, 815618, 961579, 437324, 175251, 990414, 961921, 961950, 437671, 699837, 699872, 699873, 437752, 175625, 903097, 437914, 509907, 438058, 700224, 962443, 176015, 176065, 438286, 438316, 438324, 962731, 422600, 438455, 772129, 700618, 700641, 700835, 700871, 176744, 815891, 439061, 176994, 439176, 701335, 701345, 701390, 963564, 177221, 439375, 177238, 145569, 963788, 1034446, 701663, 128113, 439731, 160671, 964054, 964152, 248086, 440098, 422893, 440269, 964577, 1034587, 440362, 1034595, 160787, 178468, 702790, 702792, 702855, 178612, 178627, 440802, 1034679, 1087076, 178825, 160885, 1034703, 699510, 703211, 703270, 441128, 703383, 965578, 292021, 441409, 703765, 510513, 29921, 441734, 860078, 996301, 259128, 947466, 704072, 704080, 826518, 704223, 704236, 991210, 704398, 442377, 30039, 991241, 442455, 442491, 442525, 442593, 564668, 442673, 180592, 967106, 180693, 772864, 443027, 180887, 311540, 180902, 161224, 443081, 204924, 705279, 181144, 181222, 1035098, 181301, 443489, 705681, 181394, 705687, 181476, 181531, 968071, 968206, 372674, 968310, 706167, 706215, 205086, 948797, 706342, 182081, 968560, 968608, 444350, 1035278, 182393, 969023, 969066, 444790, 706950, 760638, 860573, 729508, 969264, 445094, 183046, 538570, 85018, 183201, 707513, 969750, 445494, 707670, 445573, 707721, 904389, 969974, 707835, 117977, 970152, 183723, 729672, 445908, 970242, 183874, 183880, 1035535, 183988, 184105, 708438, 970605, 708517, 184235, 184249, 643359, 970824, 184436, 708739, 184452, 708781, 708904, 184621, 467683, 446834, 971213, 971233, 336648, 49802, 184916, 971378, 185009, 447169, 709342, 948351, 30860, 447340, 971633, 249321, 971653, 709560, 185276, 971729, 185299, 447540, 447551, 709802, 992132, 972064, 709936, 447797, 30956, 185879, 448035, 448123, 710297, 448183, 186063, 186265, 972699, 205809, 186390, 186446, 710755, 448630, 448745, 710914, 773858, 186727, 448975, 448976, 948653, 973362, 686541, 449235, 449244, 187186, 1079785, 481341, 992407, 973731, 711682, 711710, 711759, 973917, 711803, 711811, 711840, 879150, 449750, 1079868, 974201, 974220, 187818, 118702, 450093, 1036214, 249802, 188134, 974670, 712545, 450681, 992618, 450788, 712944, 450854, 450921, 713134, 992677, 188908, 451070, 1101110, 713360, 189115, 49943, 713448, 189174, 975688, 451406, 189312, 451484, 451609, 189466, 975997, 1088718, 452200, 190078, 190212, 190307, 714636, 714672, 714678, 976829, 714709, 976941, 190601, 381321, 715189, 453220, 453270, 715508, 715588, 453451, 977770, 977952, 191536, 453705, 978057, 191632, 453851, 453856, 453869, 191792, 454018, 191971, 454258, 716641, 978802, 192502, 192579, 979054, 979133, 454872, 192894, 993353, 455273, 556489, 717563, 979713, 455456, 993419, 979787, 512825, 762455, 193422, 717751, 717763, 455659, 455743, 455776, 455782, 455793, 455853, 455862, 980168, 193742, 718112, 193866, 456016, 306806, 193968, 718444, 456305, 980633, 294518, 456443, 456551, 718782, 194531, 981006, 456734, 133977, 194750, 194870, 981400, 1092422, 719411, 195199, 195440, 272047, 719749, 457622, 573899, 382119, 457714, 195582, 457809, 195693, 457842, 862742, 720013, 982348, 458064, 458110, 720261, 982481, 458235, 196111, 1091384, 196232, 196250, 775457, 196453, 1081321, 196596, 458771, 458774, 196720, 458885, 513397, 993996, 1080229, 196949, 196963, 983438, 983451, 197024, 983499, 983543, 721409, 459280, 459291, 128772, 983708, 459481, 197542, 721885, 984075, 1037826, 984178, 459948, 197945, 197964, 984434, 460162, 722352, 984499, 722413, 722515, 460403, 984770, 984856, 984930, 984948, 906901, 984992, 198581, 722981, 985158, 985165, 985167, 985173, 985207, 198807, 985259, 985275, 723144, 985304, 985360, 461078, 985371, 985431, 985433, 985461, 1036656, 461281, 199177, 985644, 985653, 985736, 985752, 461491, 298113, 461601, 985905, 723781, 199572, 295406, 986068, 986162, 986197, 199776, 986210, 724121, 199837, 986325, 986411, 724275, 986427, 986472, 200042, 986484, 986494, 724410, 462301, 724571, 776122, 724579, 200296, 986733, 844658, 994582, 986791, 986793, 724680, 855029, 986852, 724733, 986932, 986935, 986936, 986972, 724872, 724887, 462765, 987066, 724947, 987100, 1010524, 987183, 725047, 987192, 987230, 987237, 462979, 987309, 425688, 987486, 987502, 463230, 907334, 987567, 987573, 201154, 987671, 463443, 987791, 201366, 201376, 987809, 987822, 987823, 987845, 987914, 732631, 463635, 987978, 725867, 725951, 988119, 988121, 988122, 988124, 801059, 988142, 988149, 988169, 988211, 726076, 726098, 988269, 1038527, 988294, 988306, 988416, 202006, 988504, 202073, 988512, 202081, 988540, 988636, 988653, 988710, 464440, 156723, 988742, 988743, 988745, 726614, 464484, 988787, 820161, 988911, 988915, 988954, 988960, 988988, 989042, 989099, 989108, 989213, 202797, 1097885, 727224, 202954, 989396, 995029, 776576, 203039, 1005500, 252295, 989530, 989543, 989573, 776609, 989647, 989676, 727551, 203274, 203317, 203390, 989831, 727699, 727707, 989855, 989866, 989870, 203458, 989894, 727765, 989912, 727779, 989963, 989994, 990010, 990026, 203646, 990093, 203688, 990176, 990197, 728060, 990223, 728110, 728150, 990307, 990345, 1038871, 990375, 1038879, 466162, 990459, 990481, 466202, 990526, 466252, 1045554, 728460, 466335, 990649, 990763, 990784, 990841, 990852, 1097939, 466640, 728823, 728836, 990995, 466738, 991032, 991044, 466774, 991064, 991079, 991111, 991138, 991171, 1082701, 991207, 991240, 729173, 991324, 991342, 991364, 685717, 991383, 991419, 991471, 205107, 467274, 991590, 991598, 991662, 205251, 991685, 991748, 991761, 991762, 991782, 991832, 729697, 991854, 467597, 991894, 467612, 991938, 514851, 165335, 992120, 792463, 205741, 992184, 992191, 992193, 992224, 50498, 992257, 992340, 992363, 992365, 992367, 730229, 992383, 730278, 992433, 992531, 992535, 206117, 992559, 992605, 992652, 992659, 992660, 992729, 78076, 992757, 730626, 992802, 992839, 992840, 992869, 165480, 992946, 992949, 992950, 206549, 993041, 468762, 993107, 993153, 993174, 993178, 468907, 993234, 820899, 206806, 206819, 993255, 993320, 993492, 993501, 427730, 993544, 993606, 993627, 993651, 1010173, 207251, 731545, 993748, 993795, 993821, 469535, 993834, 731723, 993876, 731736, 993883, 993987, 994005, 994012, 207595, 731886, 731902, 994070, 994085, 994087, 469819, 994112, 994133, 994228, 470001, 994311, 994338, 994397, 732288, 994449, 994478, 994479, 994533, 574317, 208145, 732448, 208198, 470385, 994688, 208265, 1083293, 470459, 515317, 732618, 208339, 994792, 994830, 208411, 818421, 470611, 994918, 208494, 994947, 932223, 208610, 995125, 864905, 995141, 995176, 995212, 995221, 1045709, 208822, 470982, 995280, 471007, 733186, 995380, 995443, 995526, 733422, 995576, 995595, 995598, 879869, 733510, 297019, 733591, 995756, 995789, 995805, 995806, 995825, 733692, 471705, 996011, 733892, 996042, 996054, 209651, 996119, 775138, 996181, 209764, 209730, 996272, 472024, 996328, 734198, 996414, 1092450, 734426, 559318, 996634, 472359, 472448, 1083642, 996835, 210442, 296441, 996922, 997044, 997086, 1083686, 997122, 1083704, 997227, 997351, 914845, 997449, 997481, 735343, 735384, 735387, 997533, 997542, 473319, 251445, 997648, 997649, 473394, 997713, 997744, 473492, 909048, 997860, 997872, 997878, 211468, 997913, 997935, 1083839, 998013, 735895, 998062, 998101, 211691, 473886, 998192, 473935, 998246, 998247, 998248, 736125, 998309, 998381, 998417, 998482, 998493, 474234, 1083926, 1040238, 998569, 998591, 998609, 212195, 998646, 998658, 212236, 998675, 998680, 998681, 1083952, 474419, 998735, 998834, 736713, 212435, 998891, 998903, 212477, 998941, 474659, 996623, 999028, 999086, 999089, 999110, 696677, 474873, 996653, 909273, 999192, 212796, 999261, 559771, 999356, 999385, 999391, 865616, 999416, 999439, 999517, 999518, 999550, 999552, 999555, 999567, 999610, 999637, 737512, 865660, 999685, 475402, 999756, 999791, 999836, 999897, 999921, 999942, 1000000, 1000004, 1000006, 1000017, 1000030, 1000083, 737940, 996805, 1000097, 1000170, 996825, 1000232, 1000272, 738162, 738165, 1000459, 214040, 1000509, 1000585, 909506, 1000619, 738484, 1000678, 1000681, 476483, 1000798, 1000864, 1000906, 1000951, 560059, 738931, 476807, 1001108, 574051, 822218, 476947, 476977, 1001279, 1040703, 997017, 1001381, 477100, 1001397, 1001454, 477286, 477309, 477380, 739599, 1001810, 739671, 739743, 1001903, 1001926, 477639, 1001981, 210690, 1001999, 215603, 1002058, 487279, 1002145, 1002148, 1002197, 1002238, 1002252, 1002274, 50833, 1002330, 478054, 478063, 740263, 35996, 1002426, 1002482, 1002554, 740416, 1002584, 1002585, 1002596, 478359, 1002716, 1002737, 740624, 1002887, 1002889, 740762, 1002938, 1002940, 478691, 740852, 1002997, 1003003, 1003006, 1003015, 675320, 478827, 1003210, 1003213, 560419, 1003239, 478981, 1003277, 1003299, 1003329, 1003334, 1003351, 1003359, 1041043, 741274, 1003445, 1003481, 1003482, 1003507, 741392, 1003557, 1003561, 479284, 1003590, 1003603, 479379, 217246, 469873, 822642, 36214, 479525, 1003831, 1003849, 479570, 1003875, 1003880, 1003884, 1003973, 1003997, 741970, 741977, 731759, 742022, 1004191, 1004199, 1004228, 1004233, 1004240, 1004243, 1004254, 862701, 1004322, 480064, 565915, 1084905, 218000, 1004493, 827791, 742446, 480504, 742667, 480536, 298550, 1004921, 1004940, 1004949, 298565, 742822, 1005113, 1005131, 742988, 473361, 1005163, 1085035, 743046, 1005191, 480932, 36473, 1046047, 1005475, 1005476, 1005520, 1080968, 517117, 481297, 1005586, 1005653, 481387, 1005678, 1005798, 743668, 743675, 743693, 743696, 1005949, 1006000, 743868, 997808, 1006199, 744092, 481961, 744109, 954144, 744261, 1006459, 1006489, 1006509, 220087, 1006578, 1006580, 36703, 1006751, 482496, 1006791, 1006852, 744764, 1006911, 1006987, 744891, 482808, 517386, 220761, 1007242, 688218, 483028, 1007382, 129565, 483178, 483241, 745402, 1007550, 211621, 1007606, 745469, 1007628, 1007673, 1007691, 1007696, 745559, 998093, 1007875, 745746, 1007934, 745794, 1007959, 1007972, 745830, 483795, 745944, 221664, 746055, 1008208, 746065, 998174, 1063607, 1008515, 1008516, 80712, 746438, 222158, 118448, 1041951, 1008830, 484551, 1008911, 1008947, 1008951, 1008968, 1008977, 1008979, 1009023, 1009109, 823549, 1085697, 1009183, 1009237, 222954, 1009388, 561448, 747345, 1009527, 485287, 223165, 1009610, 1009668, 1009695, 1009724, 1009742, 1009749, 485558, 747720, 223468, 1009959, 1009961, 1009994, 51090, 1010048, 1010057, 1010059, 1042158, 747937, 747985, 1010151, 998485, 748054, 736347, 1010277, 1010287, 1085888, 748321, 1010527, 1010537, 486274, 1010607, 1010615, 486370, 1010670, 996317, 307008, 605363, 224314, 486512, 748672, 486623, 748771, 224548, 1011003, 1011018, 1011021, 1011044, 224626, 748935, 998641, 1011120, 1011140, 748997, 1011166, 1011328, 212251, 1011381, 1011382, 749244, 749267, 1011512, 1011529, 749399, 1011618, 1011663, 212303, 1011721, 487569, 1011860, 749752, 94798, 225499, 1012026, 998802, 749955, 225752, 837740, 750111, 824080, 1012329, 1012464, 488198, 839488, 37685, 1012547, 226132, 750421, 750487, 998905, 488416, 226335, 1012780, 1012865, 1012866, 226461, 488676, 750821, 488711, 998965, 750946, 488825, 1013114, 1013229, 1013267, 1013304, 1086391, 1013367, 1013424, 1013492, 824282, 1013579, 1013592, 1013615, 212634, 489374, 227317, 1013797, 489513, 1086477, 751778, 751797, 1013965, 227591, 824371, 1014115, 1014132, 37952, 1014210, 489931, 1014242, 649640, 1014264, 36965, 227968, 167371, 490505, 752700, 1014884, 228474, 1014911, 300246, 1015055, 780850, 490802, 228738, 490883, 753040, 490903, 753071, 1015307, 753168, 1015347, 753214, 753299, 1015556, 753480, 1094395, 1015641, 261650, 753517, 212977, 229325, 1015766, 169305, 491585, 81945, 1016013, 1016015, 999469, 999481, 1016154, 1016254, 754113, 562594, 754166, 754191, 1016406, 1092441, 1016460, 1016547, 1016565, 1086933, 1016583, 230179, 754509, 1016676, 1016703, 431602, 1016790, 1016879, 1016915, 754786, 1087589, 1016943, 10264, 1098806, 492681, 1087001, 999641, 492853, 1043337, 230725, 755040, 1017204, 755093, 1017276, 230891, 1017348, 1090915, 999691, 755275, 824938, 1017476, 1017498, 1017524, 1017529, 1017537, 231109, 755459, 1017605, 755465, 1017692, 1017706, 231292, 231298, 1017734, 1017773, 1017775, 1045527, 493508, 1017830, 493543, 1017892, 231482, 1017952, 1017971, 1018032, 1018056, 38608, 231717, 193581, 985372, 231877, 494086, 1043545, 1018525, 494346, 1018658, 825147, 1018807, 863187, 1018918, 494730, 1013570, 756949, 232703, 1019179, 1019200, 495018, 1019356, 495082, 1019405, 1019414, 868953, 757275, 1019433, 1019470, 1019602, 757511, 836044, 1019705, 1019724, 6791, 495483, 1019787, 1019830, 495680, 1087532, 1020198, 758074, 1020244, 233904, 1020376, 496175, 496244, 234114, 496276, 234165, 758519, 1020710, 1020724, 563347, 234388, 563359, 758720, 1020907, 1020915, 912899, 968004, 496717, 758901, 758909, 1021065, 234644, 234651, 1040312, 344955, 759021, 1021170, 1000319, 759062, 1021241, 234821, 1021277, 1021318, 1021324, 1021327, 825583, 1087735, 497107, 497132, 1021446, 235027, 235089, 1021532, 1021554, 1021605, 1021639, 759503, 759515, 1021679, 1021682, 1021695, 497470, 1021797, 497536, 497596, 1021900, 1021907, 1021931, 235534, 1021971, 1022022, 497757, 1000472, 1022124, 1022132, 924398, 1022178, 760070, 235832, 257772, 1022359, 1022370, 1000519, 1022410, 1022442, 1087915, 1022577, 39360, 1044249, 1022620, 1022621, 694726, 1022630, 760512, 738422, 498398, 1000574, 1022712, 1022735, 498478, 1022769, 1022782, 1022832, 236427, 1022907, 1022911, 760817, 388950, 236580, 236582, 1023025, 760908, 236708, 761032, 236801, 1088043, 761096, 738525, 499068, 400696, 1023363, 236949, 499126, 761388, 499413, 761627, 1023782, 237370, 237373, 1023838, 1023850, 499568, 1088153, 237561, 1024034, 1024069, 499818, 761963, 1024166, 1024176, 499904, 762059, 1024221, 1055921, 762111, 1024288, 1024300, 1024305, 313438, 237936, 762296, 870422, 1024528, 1024591, 1024592, 1024667, 1024669, 1024672, 762558, 1024727, 826153, 762652, 1024835, 1024893, 1024904, 762761, 1024950, 1000959, 1088347, 618979, 1099077, 1025188, 763084, 1025259, 1025270, 1025290, 238886, 476724, 1025348, 1090358, 1025483, 239189, 1025624, 1090558, 1044755, 1025714, 763619, 1025801, 1011811, 39908, 1025895, 1099105, 239511, 239516, 1025991, 239648, 1026098, 1026148, 239830, 764139, 1026372, 165116, 258485, 1026711, 1026768, 1026775, 1003114, 1026799, 764691, 1026991, 240584, 1083997, 1027178, 1027209, 765147, 1027373, 741267, 1027650, 765512, 1027669, 503390, 503401, 8701, 765583, 608197, 1027785, 1027812, 1027817, 1027865, 503580, 503607, 1027919, 503674, 1028098, 503833, 1028131, 863499, 504044, 766202, 766238, 242019, 242061, 242103, 1028538, 242107, 1028555, 504306, 1028598, 1028608, 504335, 242219, 1028652, 1088947, 1028670, 1028711, 1028742, 1028753, 1028755, 1028796, 766769, 84106, 766804, 766808, 1029003, 1029016, 1029030, 1029031, 1029058, 1029124, 1029181, 1045347, 242863, 767248, 505107, 127876, 1029402, 505152, 505171, 1029492, 1029544, 1029552, 243139, 1029617, 1029681, 1029694, 1029772, 1029791, 767671, 505541, 243416, 767745, 1029908, 1029909, 1045494, 505810, 243712, 1030176, 558046, 243761, 1030215, 1030230, 1030271, 506025, 1030324, 1030378, 1030381, 1030388, 1030446, 1030451, 244092, 1030617, 1030623, 1030722, 506438, 1030823, 302878, 506579, 477648, 1030924, 1031032, 1031033, 1031047, 1031054, 1031118, 1031240, 244821, 506985, 244902, 241246, 244929, 507087, 1031456, 245120, 739913, 507381, 1031679, 1031682, 1031684, 507434, 769630, 303045, 245416, 1031861, 156566, 1031909, 1031976, 1031999, 1032011, 1032019, 1032156, 1032182, 507901, 1032198, 507934, 1032281, 770167, 1032341, 245921, 770233, 508104, 508316, 783781, 1099746, 770604, 1032758, 1032822, 837467, 1089670, 1033007, 770894, 246626, 1033092, 1033205, 1033250, 1033296, 771170, 771239, 1033398, 509111, 509114, 1033443, 771314, 575616, 1033580, 247194, 1033652, 1046093, 1033703, 1033718, 1033725, 1033759, 776392, 771694, 771734, 1033912, 1033927, 1033962, 509730, 1034039, 1034050, 488345, 1034136, 1034172, 772055, 1034204, 510018, 478220, 1034409, 510152, 510158, 510229, 827801, 1034666, 1034680, 510444, 1034761, 478295, 248385, 1034845, 1035006, 772928, 1035078, 510858, 510867, 510893, 226509, 1035228, 1035247, 773155, 1035321, 1035367, 1035379, 1035383, 511101, 1035410, 249118, 249176, 511330, 511367, 511417, 1035719, 1035805, 1035861, 1035874, 1035931, 1036002, 1036005, 249618, 828036, 773924, 511837, 773998, 511861, 249792, 774087, 1046520, 1036244, 249821, 512087, 1036380, 1036385, 828093, 595236, 244011, 1036542, 512278, 427323, 1036627, 250228, 1036675, 512405, 1090291, 1036782, 1036784, 250367, 1036800, 1090311, 512564, 512685, 1036999, 774866, 1037033, 250636, 512807, 1037104, 1037116, 1037188, 1090377, 1037250, 863817, 1037302, 1037341, 513061, 1090399, 1037373, 740876, 1037407, 357519, 775297, 1090413, 775343, 775355, 776517, 251172, 1099495, 1037662, 1037686, 1037689, 1037722, 1037781, 1037817, 1037872, 1037881, 281002, 283154, 513779, 1038161, 391481, 156688, 1038184, 1090537, 776080, 1090550, 1090789, 1090796, 1090596, 1101531, 1038592, 776465, 174249, 1038678, 1038685, 532142, 523413, 1038719, 1038724, 1038755, 1038830, 776700, 1038849, 1046969, 959589, 1039002, 1039052, 514767, 252632, 1003319, 1039195, 1039298, 1090727, 1039346, 1090730, 1039361, 837681, 828588, 777235, 777297, 515185, 1039495, 1039521, 1039586, 515335, 777519, 1039728, 1039746, 515573, 777792, 1040022, 1040030, 1047152, 1040038, 1040064, 1040082, 1040088, 1040099, 515813, 778095, 778139, 1090887, 516029, 1040353, 253965, 253966, 1040409, 1040461, 1040507, 1040532, 1040684, 820267, 1040694, 516413, 1040793, 1040848, 1040959, 778857, 778890, 1041050, 254652, 778948, 1041146, 1041159, 1041226, 1021053, 994867, 1012328, 517085, 1003695, 1091080, 255027, 1091082, 1041473, 1041520, 779475, 779553, 1041703, 1041714, 1041753, 357664, 517516, 779674, 255469, 1041924, 1091158, 1041948, 617968, 560357, 255633, 1042099, 1091189, 829050, 1091206, 780215, 1042364, 1042426, 780297, 780336, 256052, 1042488, 1042507, 1091269, 256192, 1042626, 1042676, 1042752, 780613, 1042800, 427532, 1042978, 1043064, 794160, 780993, 256783, 781074, 518940, 1043413, 829087, 519145, 471197, 257018, 994941, 1043568, 1043587, 1043658, 1043702, 1091461, 257335, 1043815, 1043914, 1043955, 1043969, 1043995, 781877, 1044041, 782079, 1044244, 1004167, 257885, 782253, 904295, 520184, 86701, 782381, 782417, 782426, 1099726, 782549, 258337, 1091633, 1044809, 782696, 520627, 520636, 1047365, 1045071, 1045072, 520816, 1091692, 1045135, 1045203, 1045208, 1091706, 1045227, 1045229, 783098, 558548, 521018, 1045374, 783277, 611152, 1045540, 1045567, 783433, 783843, 259239, 521402, 1045717, 783602, 1045826, 783687, 259417, 1045853, 1045855, 525467, 783822, 1046042, 1091850, 521801, 783963, 783981, 521851, 1046161, 259763, 259781, 1048185, 522076, 1046384, 1046387, 522151, 1046463, 1046475, 1046567, 1046569, 260172, 1046648, 784549, 1046736, 1046750, 1092517, 1048282, 1091973, 784700, 1099803, 1046931, 784805, 1046952, 567714, 1047010, 1047012, 1047088, 784961, 1047138, 1092029, 1047160, 1047162, 260762, 522953, 567759, 1047269, 260853, 785176, 523062, 1047386, 1099823, 261098, 524116, 1047548, 1047556, 1047592, 1047599, 1047625, 1047629, 1047642, 1047662, 743708, 1047700, 1047702, 1047708, 1099831, 1047738, 1047794, 1047833, 1047843, 1047854, 1082377, 785721, 794319, 523621, 830649, 1047913, 785772, 1047917, 261521, 1047987, 830040, 261652, 261683, 786009, 786021, 523952, 1048281, 786157, 1048303, 43649, 1048359, 1048361, 1048363, 1048377, 1048381, 734979, 524166, 968921, 786375, 1048565],
+    'msmarco-passage-test-subset': [57, 524369, 43707, 1048716, 524438, 524469, 786761, 262542, 412, 262636, 524790, 1049088, 1049092, 1049096, 262686, 349622, 1049177, 1049180, 1049202, 611785, 262878, 262883, 525069, 1049390, 1049419, 1049537, 787526, 525433, 1214, 1049881, 1416, 1425, 787957, 1005146, 788034, 1050225, 1050241, 1050274, 788151, 263874, 1050405, 1937, 526266, 2045, 526352, 175123, 1050794, 1050808, 87762, 264434, 526597, 264602, 2610, 612105, 1051206, 2663, 789140, 1051326, 527126, 789305, 1136443, 1051570, 265198, 527398, 340540, 3243, 1051868, 3297, 1136507, 265494, 527647, 527745, 655939, 1052076, 306421, 789981, 1052164, 790111, 790118, 1073795, 1005502, 1136591, 265976, 790280, 3867, 528174, 266150, 1052743, 1052835, 266488, 4383, 656101, 1053031, 266611, 1053061, 481345, 266752, 791170, 656150, 529043, 4776, 838148, 1053411, 1126209, 88200, 791397, 529272, 5000, 1136818, 1053809, 1053885, 1126223, 529769, 267705, 792006, 792143, 1126077, 530079, 1054386, 268010, 530174, 1005869, 962187, 1054576, 530295, 6055, 268235, 6113, 1054733, 1054749, 1054797, 1054959, 268574, 792925, 793003, 1055227, 1055343, 793245, 1006025, 1055448, 1049736, 793432, 531296, 531311, 531334, 531470, 1055761, 793699, 269425, 576361, 525514, 831384, 794144, 908077, 1056313, 1113870, 1049877, 269960, 1056425, 7869, 794347, 1056576, 532352, 229260, 8136, 270297, 532494, 270378, 794701, 1056910, 8452, 1057030, 1057038, 1057241, 1057246, 8718, 533268, 533312, 795460, 307344, 87592, 9082, 533379, 533428, 795622, 271435, 533613, 271481, 1057936, 533677, 271544, 795872, 271620, 271672, 1058138, 271748, 533953, 271835, 534021, 271881, 796223, 534202, 272071, 534250, 534279, 796451, 831794, 272269, 796563, 10141, 1058856, 796808, 534684, 1059138, 1059177, 1059186, 272863, 535009, 797200, 535067, 272951, 10941, 1108953, 1059643, 797562, 797565, 535431, 535449, 11172, 11258, 927093, 273443, 11304, 535610, 1059906, 535668, 797815, 535699, 797862, 307758, 1060142, 798003, 273773, 798239, 536101, 11863, 798354, 875806, 9129, 536330, 12048, 1060669, 274306, 12166, 1060768, 798642, 1060897, 274522, 274555, 274605, 1050658, 536822, 798979, 12553, 274731, 274771, 536931, 274873, 799171, 1061326, 12761, 190377, 1061434, 799293, 799323, 537188, 537223, 12954, 537267, 1061590, 275173, 13101, 1061763, 537545, 275413, 1061852, 537610, 799784, 537744, 799932, 537821, 275722, 275737, 800142, 538026, 538078, 275951, 13823, 800274, 538143, 13912, 800348, 276099, 614001, 538373, 538393, 857280, 276343, 14244, 800703, 855922, 538654, 276525, 14448, 1063049, 657776, 276665, 800980, 538850, 14571, 538878, 538943, 276928, 277093, 733738, 277177, 795754, 1063665, 1063739, 788960, 745278, 539535, 539566, 539595, 395821, 802079, 277868, 1064344, 1051205, 278034, 1064595, 540318, 1064659, 278239, 540436, 802584, 802667, 278403, 540572, 802776, 540655, 802817, 278684, 278691, 655097, 1065206, 540951, 803115, 803296, 541155, 541229, 541238, 1065534, 279176, 17077, 803596, 17199, 803633, 1065923, 541708, 803911, 803948, 1117901, 1066186, 804098, 585434, 1066264, 542015, 280019, 1066463, 804329, 1066501, 18075, 920524, 18164, 542582, 804764, 804916, 280704, 18711, 18793, 543290, 805501, 1067743, 1067801, 543631, 543638, 1008125, 543772, 805998, 19684, 1068313, 1068315, 281922, 806258, 833432, 544191, 282055, 544220, 864839, 1126711, 282125, 20010, 1068587, 806450, 1119189, 527633, 282280, 806602, 544540, 544560, 943412, 456029, 544623, 806779, 544722, 20440, 20530, 20616, 20734, 323665, 20892, 1069557, 21003, 21075, 545410, 283348, 21227, 807811, 807866, 21444, 545791, 1070083, 807974, 283924, 21792, 546081, 1008523, 21838, 1070417, 808343, 284067, 546217, 1070511, 1070533, 808540, 546404, 22197, 22256, 546597, 1070941, 22372, 808898, 546758, 921267, 790199, 1071255, 284910, 1071370, 22817, 547140, 285032, 285049, 809339, 547374, 547405, 809570, 809594, 547515, 809683, 341207, 547627, 23367, 547661, 547711, 809913, 547770, 23531, 285797, 548020, 548054, 23822, 23910, 1072522, 548237, 23963, 1072559, 23986, 810496, 24093, 810631, 834181, 810830, 286573, 24462, 1073093, 810958, 811001, 24636, 965313, 811245, 549110, 286980, 24841, 549190, 943566, 549213, 549237, 811447, 811457, 25026, 549327, 1073638, 91565, 572170, 811650, 811758, 549663, 703281, 549722, 25465, 266390, 811974, 812006, 1074152, 812148, 484886, 1074350, 25802, 1074449, 397579, 812644, 288566, 1124469, 222861, 812967, 572446, 48170, 813370, 551271, 1075711, 27310, 813770, 289499, 289517, 551683, 813841, 922089, 27528, 551851, 1076183, 289801, 814107, 27705, 1076289, 289943, 814248, 814340, 1076490, 552319, 1076615, 552364, 290290, 552458, 552459, 552563, 290429, 814791, 552670, 814827, 28453, 814908, 814920, 1077110, 552834, 28661, 552956, 552959, 28684, 28862, 553161, 553530, 553571, 291553, 1078015, 1078080, 815939, 354466, 29667, 553988, 29725, 29785, 1078365, 29811, 1078498, 816456, 292227, 292284, 554435, 30163, 1078745, 1078766, 554529, 1078827, 30282, 554585, 30359, 816794, 227967, 816915, 554792, 816994, 30649, 30677, 1079280, 1053723, 293041, 293069, 555353, 31169, 31192, 136287, 747961, 293516, 555674, 31504, 1080183, 743021, 31825, 818265, 583532, 32202, 1080843, 32278, 556581, 32330, 1080912, 556637, 294538, 294614, 556923, 556935, 557046, 1081348, 1081455, 966679, 295077, 1081591, 33137, 1081774, 295361, 557592, 819755, 1081930, 923070, 557806, 295730, 295732, 820021, 620882, 557944, 557952, 558003, 996237, 617318, 820387, 311494, 558303, 792115, 296378, 34322, 341578, 558809, 558963, 558978, 297010, 297058, 34925, 297146, 35150, 821619, 821676, 9904, 559629, 35370, 442810, 297682, 822224, 560231, 36033, 822527, 560709, 560815, 823034, 561064, 70230, 823415, 823541, 561398, 37122, 561461, 37185, 561499, 299381, 561538, 299461, 9975, 561834, 37547, 299709, 299781, 561991, 37706, 562002, 562030, 824384, 562352, 562373, 38087, 38122, 574337, 824644, 300375, 300384, 312219, 824765, 300537, 562697, 1054994, 562904, 825151, 874196, 825453, 301352, 563542, 39325, 603617, 825961, 563898, 39660, 563962, 302038, 826354, 564283, 399884, 564310, 564382, 40124, 564558, 826731, 40648, 827155, 94173, 40892, 827381, 41048, 827488, 565366, 41184, 793327, 827847, 402915, 303637, 303706, 828068, 303874, 566216, 487606, 42055, 828518, 566407, 618695, 304358, 304380, 304430, 828723, 304449, 828744, 566615, 566618, 1107117, 566732, 42476, 42541, 50782, 566937, 829107, 567024, 829295, 305205, 567416, 43167, 567505, 43476, 567820, 567870, 567878, 567881, 567899, 830234, 830306, 830462, 306216, 568405, 568511, 830709, 44340, 1019841, 44537, 831024, 968631, 44621, 44659, 44678, 306873, 569047, 569053, 831217, 569196, 569409, 569527, 307403, 45301, 542712, 831882, 1012478, 569862, 569902, 307826, 45895, 832389, 925169, 832897, 308617, 570789, 570875, 46683, 571009, 571014, 571087, 833302, 309040, 1118879, 571215, 46941, 309117, 833544, 47125, 571431, 571432, 47269, 833797, 571671, 1110643, 182697, 47714, 47729, 309926, 619655, 572364, 572432, 310488, 834856, 310642, 834934, 1136837, 572884, 572957, 572976, 1136401, 48846, 573221, 1056764, 1056770, 573471, 573622, 835783, 49381, 49387, 8234, 573701, 49429, 49439, 49482, 835940, 836003, 836007, 836062, 574068, 49810, 574114, 49850, 49984, 848382, 836498, 857520, 50189, 836636, 836655, 312378, 836888, 50626, 312826, 837140, 1119006, 50797, 50800, 708745, 837287, 50863, 837383, 575259, 837433, 575378, 663878, 575461, 796050, 575606, 575621, 313491, 51360, 51514, 838000, 445502, 51577, 313747, 313766, 838235, 314064, 838377, 838421, 576292, 838440, 576305, 576357, 314262, 52199, 838709, 576792, 52604, 576964, 980518, 577234, 839463, 577546, 839707, 53330, 839841, 53422, 1119097, 577746, 839899, 839912, 315683, 857232, 840136, 1102456, 1102474, 1102477, 1102488, 1102498, 1102578, 1102579, 1102589, 1102590, 1102617, 1102645, 1102657, 1102667, 1102682, 316262, 1102704, 1102714, 316302, 1102744, 1102755, 1102768, 1102803, 1102811, 1102827, 1102839, 1102854, 1102862, 1102869, 1102892, 1102895, 1102970, 1102979, 1102989, 1103007, 1103009, 1103019, 1103076, 1103084, 1103089, 1103091, 1103093, 1103121, 1103136, 1103156, 1103166, 1103182, 1103192, 1103198, 1103250, 1103257, 1103260, 1103289, 1103290, 1103314, 1103322, 54747, 1103350, 1103355, 579092, 1103387, 1103416, 1103418, 841302, 1103467, 1103468, 1103502, 1103506, 1103511, 54958, 1103535, 1103537, 1103553, 1103555, 1103561, 1103579, 1103581, 1103601, 1103630, 1103645, 1103651, 1103654, 55079, 1103669, 1103679, 1103684, 1103687, 1103690, 1103759, 1103787, 1103793, 1103798, 1103816, 1103826, 1103828, 1103879, 1103888, 1103906, 1103910, 1103911, 1103921, 1103966, 1103969, 1103974, 1104005, 1104020, 1104022, 55454, 1104036, 1104064, 1104071, 1104077, 1104087, 1104099, 1104105, 1104118, 1104124, 1104175, 842042, 1104198, 1104215, 1104221, 1104223, 1104235, 1104252, 1104258, 1104279, 1104339, 1104403, 1104406, 580130, 1104454, 1104458, 1104468, 1104497, 1104506, 1104509, 1104513, 795757, 1104537, 1104557, 1104624, 1104630, 1104633, 1104640, 1104685, 1104698, 1104699, 1104704, 1104712, 1104720, 1104725, 1104740, 1104745, 1104763, 1104773, 1104781, 1104805, 580605, 842753, 1104915, 1104920, 1104942, 1104949, 1104950, 1104957, 1104984, 1105013, 1105017, 842886, 1105042, 1105073, 1105086, 1105087, 1105100, 1105108, 1105110, 1105142, 1105144, 1105146, 1105147, 1105148, 1105163, 1105169, 1105183, 1105190, 1105202, 318781, 1105216, 1105239, 1105248, 1105253, 1105263, 1105275, 1105280, 1105285, 1105287, 1105298, 1105337, 1105358, 1105364, 1105381, 1105425, 1105427, 1105429, 1105431, 1105432, 1105435, 1105441, 1105445, 56894, 1105485, 1105489, 1105498, 1105506, 1105515, 1105526, 1105551, 1105565, 1105567, 1105571, 1105585, 1105593, 1105594, 1105614, 1105617, 1105625, 581377, 1105666, 1105686, 1105689, 1105700, 581447, 1105753, 1105761, 1105766, 57218, 1105798, 1105805, 1105816, 1105817, 581535, 1105831, 1105853, 57292, 1105870, 1105871, 1105886, 1105900, 581663, 1105953, 1105978, 1105989, 1106011, 1106027, 1106029, 1106088, 1106089, 1106099, 1106125, 1106126, 581844, 1106141, 1106144, 1106159, 1106162, 319742, 1106180, 319757, 1106200, 1106201, 1106212, 1106216, 1106231, 1106234, 1106241, 57674, 844113, 1106273, 844140, 57710, 1106290, 1106291, 1106306, 1106327, 1106343, 1106348, 57774, 1106377, 1106381, 1106389, 1106408, 1106414, 582128, 1106421, 708693, 1106440, 1106450, 1106482, 1106491, 1106502, 1106508, 1106516, 1106521, 1106533, 1106535, 1106539, 1106543, 844433, 1106589, 1106597, 1106607, 1106617, 582339, 1106642, 1106652, 1106658, 1106672, 1106676, 582390, 1106680, 1106686, 1106709, 1106756, 1106764, 582484, 1106797, 1106821, 1106829, 1106834, 1106840, 1106850, 1106858, 1106862, 1106873, 1106912, 1106920, 320499, 1106988, 1107000, 1107015, 1107023, 1107033, 1107046, 1107082, 1107085, 1107091, 1107103, 1107108, 1107112, 320700, 1107141, 1107147, 1107149, 1107151, 1107153, 1107171, 582884, 1107189, 1107192, 1107193, 1107207, 1107210, 1107245, 1107299, 1107308, 1107336, 1107344, 1107353, 1107362, 1107364, 1107373, 845232, 1107381, 1107399, 1107401, 1107450, 1107455, 845321, 1107494, 1107525, 583249, 255251, 1107563, 1107565, 1107593, 1107602, 1107618, 1107640, 1107646, 1107677, 1107685, 1107702, 583443, 1107748, 1107749, 1107794, 59230, 583527, 1107834, 1107845, 845740, 1107885, 845751, 1107898, 1107915, 321504, 1107953, 1107960, 1107970, 1107982, 1107991, 1108009, 1108011, 845894, 1108071, 1108099, 1108121, 1108131, 321703, 1108147, 1108202, 1108203, 1108216, 1108227, 1108241, 1108268, 1108271, 59722, 59725, 584017, 1108332, 1108400, 1108406, 846308, 1108462, 1108472, 1108478, 1108481, 59911, 1108492, 1108510, 1108516, 1108521, 1108523, 1108526, 1108564, 1108607, 1108626, 1108629, 1108636, 1108637, 1108645, 1108658, 1108699, 1108718, 1108735, 1108738, 1108754, 1108763, 1108789, 1108799, 1108833, 1108847, 1108874, 1108875, 60301, 1108897, 1108911, 1108914, 1108922, 1108935, 1108950, 1108959, 1108975, 1108983, 1108984, 1108985, 1109002, 1109022, 1109040, 322609, 322610, 1109048, 1109110, 1109114, 1109188, 1109190, 1109195, 1109201, 1109214, 1109215, 1109238, 403328, 1109261, 1109311, 1109319, 60764, 1109379, 1109397, 60831, 1109408, 1109413, 1109436, 847301, 60870, 323018, 1109462, 1109463, 1109464, 1109471, 1109473, 1109474, 1109477, 1109487, 1109496, 585220, 1109525, 1109537, 1109540, 1109542, 1109546, 1109563, 1109571, 1109579, 1109599, 1109615, 1109616, 1109628, 1109655, 1109657, 1109658, 1109694, 1109701, 323294, 1109768, 1109784, 1109788, 1109794, 1058779, 1109805, 1109822, 323393, 1109853, 1109872, 585585, 323461, 1109901, 1109917, 1109974, 323565, 1110081, 1110087, 61526, 1110163, 1110169, 585888, 1110189, 1110196, 1110203, 1110213, 1110215, 1110217, 1110234, 1110246, 1110264, 1110275, 1110284, 1110294, 1110295, 1110314, 1110321, 1110322, 1110326, 1110337, 1110344, 1110353, 1110357, 1110388, 323959, 1110392, 1110401, 1110410, 1110423, 1110426, 1110468, 1110470, 1110498, 1110512, 1110523, 1110576, 1110605, 62078, 62097, 1110687, 1110698, 1110730, 848623, 1110776, 1110836, 578356, 1110849, 1110858, 1110868, 1110874, 1110903, 1110905, 1110929, 1110936, 1110964, 1110997, 1111023, 1111024, 586754, 586761, 1111057, 1111071, 1111099, 62525, 1111119, 1111132, 1102693, 62577, 1111156, 1111168, 1111205, 1111241, 1111292, 1111306, 1111313, 1111316, 1111338, 1111345, 1111377, 1111392, 1111396, 1111400, 62845, 587145, 1111470, 1111472, 1111502, 1111504, 1111564, 1111566, 1111576, 1111580, 1111581, 1111605, 1111650, 1111662, 1111668, 1111678, 1111702, 1111705, 1111710, 1111749, 1111760, 1111790, 1111791, 1111802, 1111810, 1111813, 63246, 849720, 63290, 1111890, 1111892, 1111898, 1111902, 1111908, 54246, 1111957, 1111969, 1111987, 1112014, 1112018, 1112044, 1112061, 1102849, 1112089, 1112100, 1112105, 1112107, 1112141, 1112154, 316436, 1112195, 1112210, 1112234, 1112240, 1112250, 1112262, 1112291, 1112297, 1112302, 1112306, 1112313, 1112324, 1112327, 850214, 1112366, 63795, 1112375, 1112377, 1112382, 1112384, 1112390, 1112394, 1112396, 1112406, 1112442, 1112452, 1112479, 1112487, 1112494, 1112501, 1112506, 1112514, 1112517, 588308, 1112606, 1112614, 1112625, 1102942, 1112656, 1112663, 588408, 1112702, 1102959, 1112755, 1112770, 1112819, 1112827, 1112834, 1112838, 1112847, 1112893, 1112897, 1112908, 1112928, 1112939, 1112944, 1112947, 1112954, 64384, 1102998, 1112968, 1112985, 588712, 64430, 1113041, 1113073, 1113090, 1113092, 1113109, 64535, 1113125, 1113147, 1113148, 1113158, 1113163, 1113170, 1113201, 1113216, 326787, 326797, 1113231, 1113258, 1113269, 1113304, 1113307, 1113318, 1113347, 1113352, 326921, 1113380, 1113381, 1113393, 1113398, 1113416, 1113422, 1113429, 1113439, 64882, 589171, 1113461, 1113490, 1113496, 1113498, 1113506, 1113520, 1113525, 1113526, 1113528, 1113545, 1113597, 1113608, 1113622, 65052, 1113654, 1113683, 1113699, 1113710, 327292, 1113728, 1113756, 1113767, 1113792, 1113796, 1113802, 1113808, 1113831, 1113840, 1113861, 1113877, 1113944, 1113959, 1113970, 1114019, 1114026, 1114047, 1114055, 1114066, 1114078, 1114092, 1114093, 1114099, 1114108, 1114125, 1114131, 589844, 1114149, 1114153, 589875, 1114164, 1114181, 1114185, 1114188, 1114198, 1114200, 1114206, 1114215, 404156, 327812, 65692, 1114275, 327855, 1114292, 65752, 1114358, 1114383, 65809, 1114428, 1114460, 1114471, 1114476, 65904, 1114483, 1114488, 1114495, 1114498, 1114502, 1114510, 1114512, 1114521, 1114523, 1114539, 65965, 328110, 1114549, 535288, 1114584, 1114585, 1114588, 1114589, 1114634, 1114635, 1114650, 590365, 1114654, 1114655, 1114664, 1114669, 1114686, 1114700, 590445, 1114743, 1114750, 1114753, 1114757, 1114782, 1103303, 1114805, 1114817, 1114828, 1114838, 590560, 1114856, 1114870, 328464, 1114901, 1114905, 1114946, 1114947, 1114948, 1114979, 852842, 1115004, 590726, 1115021, 1115030, 1115032, 1115072, 1115075, 1115086, 1115092, 1115097, 1115100, 1115106, 1115109, 1115118, 1115154, 1115172, 1115181, 1115191, 1115197, 1115206, 1115255, 1115281, 66745, 1115325, 1115332, 1115355, 1115372, 1115388, 1115423, 1115425, 1115432, 1115462, 1115463, 1115485, 1115511, 853376, 1115526, 1115539, 1115544, 1115584, 1115585, 1115586, 1115595, 1115599, 591326, 1115617, 1115649, 1115650, 1115651, 1103446, 1115660, 1115667, 853533, 1115708, 1115716, 67147, 1115748, 1115783, 1115784, 1115796, 67222, 67225, 1115819, 591543, 1115870, 1115877, 1115881, 1115887, 1115898, 1115929, 1115933, 1115949, 1115961, 1115970, 1115976, 1115983, 67422, 591718, 1116013, 1116015, 1116019, 1116021, 1116025, 1116028, 1116037, 591784, 1116090, 1116103, 1116112, 1116121, 1116139, 1116161, 1116162, 1116164, 1116168, 1116169, 1116185, 1116201, 1116211, 1116228, 1116234, 1116236, 1116264, 1116265, 1116268, 1116273, 1116301, 1116304, 1116324, 1116353, 1116361, 1116368, 1116369, 1116395, 1116419, 1116429, 1116433, 1116440, 1116450, 1116452, 1116467, 186086, 1116531, 1116537, 1116548, 330117, 1116553, 1116554, 1116592, 1116606, 1116607, 592329, 1116657, 1116663, 1116694, 1116695, 1116702, 1116706, 1116728, 1116775, 1116776, 1116799, 1116821, 1116846, 1116862, 1116864, 1116867, 1116871, 1116877, 1116896, 1116902, 854766, 330504, 1116964, 1116996, 854884, 1117033, 1117055, 1117062, 1117066, 1117104, 1117148, 1117150, 1117154, 1117178, 68618, 54659, 68626, 1117206, 1117261, 1117263, 1117271, 1117294, 593007, 1117299, 1117307, 1117313, 1117331, 1117337, 68764, 1117350, 1117357, 1117361, 1117375, 68811, 1117402, 1117446, 1117450, 1117505, 317325, 1117566, 1117579, 1117581, 1117584, 1117589, 1117616, 1117623, 1103776, 69075, 1117672, 593386, 1117689, 1117691, 1117700, 1117708, 1117709, 331284, 1117725, 1117732, 1117740, 593455, 1117748, 1117757, 1117765, 593489, 1117787, 1117798, 855667, 1117826, 1117858, 1117875, 1117883, 1117892, 1117911, 1117925, 331496, 1117935, 1117951, 1117978, 1117979, 1118012, 1118014, 1118042, 855901, 1118055, 331648, 1118140, 1118145, 1118169, 1118172, 1118176, 1118187, 1118199, 1118209, 1118227, 1118229, 1118230, 1118232, 1118240, 1118248, 1118251, 1118259, 593986, 1118293, 1118294, 1118310, 1118326, 1118333, 1118388, 1118389, 1118394, 1118416, 69841, 1118423, 1118429, 1118434, 1118455, 1118456, 1118457, 1118522, 856393, 1118585, 1118595, 1118627, 594352, 1118641, 1118651, 1118659, 1118671, 1118676, 1118677, 1118683, 70108, 70145, 1118737, 1118759, 594476, 1118768, 1118771, 1118792, 1118793, 1118797, 1118799, 1118869, 1118871, 1118883, 1118884, 1118889, 1118892, 1118926, 1118927, 1118953, 1118954, 1118976, 1119004, 1119013, 1119015, 1119021, 1119038, 1119040, 70472, 1119060, 1119076, 856951, 1119110, 1119112, 1119115, 1119121, 856978, 594835, 1119128, 70555, 1119132, 1119150, 1119168, 1119169, 1119179, 594910, 1119207, 1119225, 332824, 1119264, 1119271, 1119280, 70714, 332859, 1119305, 1119307, 595026, 1119316, 70752, 1119330, 1119338, 1119347, 1119374, 1119392, 1119440, 1119444, 1119496, 1119501, 1119528, 1119529, 1119592, 1119593, 1119597, 1119603, 333182, 1119617, 1119620, 1119627, 857537, 595422, 71138, 1119744, 1119755, 1119758, 1119764, 1119774, 857643, 1119828, 1119862, 1119884, 333454, 1119904, 333489, 595641, 1119943, 1119953, 71390, 595689, 1120006, 1120010, 1120019, 1120041, 1120044, 1120049, 1120084, 1120089, 595808, 1120119, 1120128, 71557, 1120162, 1120167, 1120180, 1120187, 1120189, 595910, 1120248, 1120253, 1120261, 1120268, 596007, 858159, 1120316, 1120328, 1120361, 858242, 1120391, 1120395, 1120399, 1120418, 1120453, 1120462, 858328, 1104250, 71908, 1120515, 1120519, 1120541, 1120559, 1120564, 1120574, 1120576, 72015, 858461, 1120606, 1120619, 1120633, 334219, 334222, 1120667, 1120668, 1120672, 1120676, 334251, 1120703, 1120704, 1120706, 1120736, 1120744, 1120773, 1120775, 1120776, 1120781, 55727, 858675, 596533, 1120834, 1120835, 858696, 1120842, 1120867, 1120877, 1120887, 1120891, 1120904, 1120919, 1120924, 1120926, 596645, 596659, 1120963, 1114962, 1120982, 1120986, 596699, 1120988, 1120994, 1121000, 1121017, 1121022, 1121044, 1121050, 72476, 1121057, 334633, 1121068, 1121082, 1121083, 1121104, 1121118, 596837, 334716, 72577, 1121156, 1121162, 72587, 1121167, 859072, 859101, 1121249, 1121260, 1121268, 1121269, 1121309, 597025, 1121327, 1121366, 1121380, 1121397, 1121412, 1121424, 1121426, 1121459, 1121466, 1121474, 72904, 1121501, 1121523, 859387, 1121532, 335114, 1121551, 1121559, 1121566, 859431, 1121576, 1121618, 1121624, 1121631, 1121641, 1121642, 597377, 1121667, 1121673, 335288, 1121736, 1121748, 1121759, 1121799, 335374, 1121814, 1121817, 1121830, 1121861, 1121875, 73328, 1121922, 1121931, 1121937, 1121940, 1121963, 1121967, 597686, 73415, 1121993, 1122033, 1122064, 580227, 1122082, 1122084, 1122087, 859955, 1122155, 1122168, 73619, 1122212, 1122220, 1122221, 1122222, 1122233, 1122234, 1122237, 1122242, 1122247, 1122255, 1122283, 1122286, 860145, 1122305, 1122306, 1122308, 1122325, 1122334, 1122336, 1122341, 1122342, 1122343, 1122348, 598116, 1122409, 1122442, 336011, 1122446, 73882, 1122467, 1122471, 1122476, 1122488, 1122501, 1122504, 1122510, 1122557, 598286, 74000, 1122584, 1122586, 1122593, 1122594, 1122606, 1122619, 74057, 598348, 1122648, 1122652, 1122658, 1122686, 1122690, 1122695, 1122706, 929863, 1122734, 1122745, 1122760, 1122772, 1122776, 1122785, 860643, 1122792, 1122811, 1122826, 1122846, 74274, 1122853, 1122859, 1122892, 336462, 1122908, 1122915, 1122936, 1122966, 1122997, 1123028, 1123052, 1123055, 1123074, 1123090, 1123103, 1123112, 860972, 1123133, 1123134, 1123136, 1123168, 1123184, 1123209, 1123211, 1123220, 598934, 1123227, 1123257, 598973, 1123298, 1123307, 1123350, 318302, 1123383, 1123397, 1123405, 1123408, 1123412, 599137, 1123465, 599178, 1123492, 1123499, 1123518, 1123520, 1123544, 1123586, 1123603, 1123626, 1123627, 1123629, 1123636, 1123649, 1123660, 1123709, 599432, 1123721, 1123755, 1123761, 1123765, 1123776, 599504, 599538, 1123840, 75286, 861724, 1123888, 1123927, 1123930, 1123952, 1123953, 599673, 1123968, 1123969, 1123971, 1123997, 599724, 1124030, 1124040, 1124067, 1124087, 1124093, 1124114, 1124122, 1056710, 1124150, 1124152, 1124159, 1124160, 1124170, 1124171, 1124194, 1124218, 1124226, 1124251, 1124276, 1124300, 600013, 1124306, 1124324, 1124335, 75799, 337952, 1124388, 1124391, 1124451, 1124462, 613923, 1124472, 274797, 1124501, 1124504, 1124522, 1124530, 1124531, 1124534, 1124542, 1124549, 1124569, 1124573, 1124601, 1124621, 1124660, 1124663, 76102, 1124695, 1124699, 1124703, 76140, 76154, 76171, 1124753, 1124767, 1124796, 1124802, 1124803, 1124820, 1124822, 1124831, 1124834, 1124844, 1124866, 1124872, 580697, 1124915, 1124925, 1124926, 1124953, 1124958, 1124982, 1124989, 1124990, 1124998, 1125015, 1125036, 1125041, 859732, 1125079, 1125111, 1125116, 1125132, 1125138, 1125153, 1125194, 1125238, 1125245, 600959, 1125251, 1105046, 1125272, 1125273, 1125292, 1119872, 1125342, 1125352, 842923, 1125394, 1125395, 1125409, 1125443, 1125455, 1125459, 596136, 76945, 1125550, 1125555, 1125559, 1125583, 1125587, 1125590, 1125591, 1125592, 1125596, 1125599, 1125626, 1125628, 1125633, 1125644, 1125645, 1125667, 1125680, 1125684, 1125688, 1125694, 1125699, 339286, 1061433, 1125743, 1125745, 1125750, 1125782, 1125820, 1125829, 1125841, 1125864, 1125920, 1125924, 1125930, 601649, 1125945, 1125947, 1125956, 1125959, 601682, 1125975, 1125979, 1126018, 1126028, 1126035, 1126069, 601783, 1126090, 1126106, 77565, 1126144, 1126146, 1126149, 1126156, 1126178, 77619, 1126215, 1126244, 1126245, 1126267, 339841, 788201, 77725, 1126317, 1126331, 1126361, 1126374, 1126377, 1126380, 1126414, 1126416, 1126425, 1126490, 1126491, 1126494, 1126499, 56678, 1126507, 1126517, 77947, 1126525, 1126531, 1126533, 1126545, 1126578, 1126587, 1126609, 1126651, 1126654, 1126658, 1126673, 843139, 1126691, 1126742, 1126750, 1126761, 1126807, 340377, 1126815, 843163, 864694, 602575, 1126875, 1126880, 864762, 78332, 1126909, 1126910, 1126914, 1126925, 1126931, 1126932, 1126935, 1126945, 1126948, 1126958, 1126963, 1126971, 340541, 1126994, 1127025, 1127044, 1127064, 1127081, 1127084, 1127097, 1127110, 1127117, 1127131, 1127132, 581075, 1127162, 1127177, 1127184, 1127188, 1127195, 78640, 1127228, 1127246, 1127280, 1127325, 1127331, 1127351, 1127359, 1127364, 1127367, 603085, 1127378, 1127398, 1119942, 1127411, 1127425, 1127434, 1127444, 1127448, 1127454, 1127462, 1127468, 1127474, 1127475, 1127489, 1127493, 1127498, 1127525, 1127547, 1127554, 1127561, 1127562, 1127567, 1127570, 1127588, 1127621, 1127668, 1127674, 1127684, 1127695, 1127697, 1127730, 1127741, 1127752, 1127753, 1127754, 79203, 865638, 1127790, 1127810, 1127822, 1127866, 1127895, 1127896, 1127897, 865754, 1127914, 1127932, 1127938, 1127959, 1127969, 79438, 1128028, 1128040, 865909, 1128064, 1128071, 581229, 1128080, 603796, 1128087, 1128119, 1128152, 1128160, 1128164, 1128166, 1128185, 56962, 1128212, 1128264, 1128276, 1128280, 1128283, 1128287, 1128297, 1128319, 1128337, 1128342, 1128346, 1128362, 1128380, 1128381, 1128385, 604102, 604126, 1128417, 1128432, 1128434, 1128450, 1105582, 1128484, 1128492, 1128494, 1128508, 1128511, 581303, 1128524, 1128539, 1128549, 1128567, 342150, 342159, 1128620, 604334, 1128624, 1128668, 80098, 1128676, 1128681, 1128689, 1128691, 1102527, 1128726, 1128738, 1128752, 1128760, 1128801, 1128818, 1128821, 1128840, 1128860, 1128872, 319218, 1128888, 1128889, 1128900, 1128903, 604621, 1128914, 1128916, 1128917, 1128927, 1128958, 1128976, 1128979, 1128981, 1128986, 1128995, 1129055, 1129067, 1129084, 1129085, 1129103, 1129145, 1129167, 1129226, 1129227, 1129232, 1129274, 1129315, 1129356, 1129357, 605098, 867262, 1129422, 1129433, 1129444, 1129448, 1129499, 1129514, 1129537, 1102556, 1129589, 81017, 81030, 1129630, 1129642, 1129644, 1129650, 1129700, 1129731, 1129733, 1129743, 1129748, 1105797, 81184, 1129769, 1129770, 1117767, 81225, 605516, 1129822, 1129838, 1129841, 1129878, 1129888, 1129896, 1129905, 1129959, 1129973, 1129981, 1130006, 1130013, 1130015, 1130023, 1130051, 343640, 1105850, 1130084, 1130091, 1130115, 1130123, 1130156, 1130193, 868055, 1130214, 1130232, 1130240, 1130254, 1105882, 1130277, 1130307, 1130312, 1130327, 1130332, 1130335, 1130340, 1105897, 1130431, 1130435, 1130511, 1130524, 1130536, 1130548, 1130575, 1130608, 1130640, 1130667, 1130670, 1130672, 1130684, 1130708, 1130726, 1130728, 1130732, 1130744, 1130772, 1130805, 1130806, 1130830, 1130837, 1130849, 1105982, 1130874, 1130885, 1130897, 1130916, 1130935, 1130937, 1130946, 82408, 1130988, 1130994, 1130996, 1131004, 1131005, 1131008, 1131013, 1131042, 1131048, 1131049, 868913, 1131062, 1131075, 606791, 1131092, 1131104, 1131106, 868965, 82578, 1131155, 1131173, 1131182, 1131192, 669130, 1131209, 1131216, 1131222, 1131227, 1131240, 1131260, 1131278, 1131279, 1131295, 1131300, 1131301, 1131307, 1131320, 1131343, 1131358, 344937, 82810, 1131396, 1131411, 1131415, 1131417, 607143, 869292, 1131446, 1106079, 1131467, 1131478, 1131510, 1131512, 1131533, 345102, 1131543, 82973, 1131554, 1131557, 1131559, 1131562, 1131588, 1131593, 1131596, 1131609, 1131650, 931357, 1131699, 1131703, 1131724, 1131729, 1131735, 1131738, 1131754, 1131777, 1131786, 1131813, 1131818, 1131821, 1131830, 1131840, 317018, 1131884, 1131892, 83320, 1131961, 83401, 1131983, 1132006, 1132047, 1132059, 869918, 1132147, 1106196, 1132162, 1106199, 1132255, 1132268, 345861, 1132309, 1132312, 870172, 1132336, 1132347, 1132352, 1106230, 608084, 1132399, 1132409, 608124, 1132444, 346023, 1132513, 1132529, 83959, 1132541, 1132549, 1132564, 1132574, 1132583, 1132588, 1132592, 1132613, 346202, 1132647, 1132651, 1132717, 870582, 1132734, 1132735, 1132737, 1132754, 608491, 625704, 1132790, 608508, 1132815, 1132829, 1132834, 1132890, 232508, 1132913, 1132921, 1132945, 1132952, 1132959, 1132960, 1132965, 1132977, 1106335, 1132996, 1133036, 1133057, 1133092, 756829, 1133113, 1133122, 1133153, 608870, 871016, 1133171, 1133173, 1133187, 1133190, 1133202, 1133231, 1133252, 1133254, 1133258, 1133288, 582098, 407339, 1133349, 1133366, 1133374, 1133376, 1133380, 1133405, 1133428, 1133431, 1133442, 1133444, 871301, 1133474, 1133527, 582138, 609252, 1133557, 1133558, 1133611, 1133620, 1133644, 1133646, 1133652, 1133658, 1133710, 1133715, 1133721, 1133744, 1133757, 1133780, 1133792, 1133798, 1133799, 582183, 871720, 1133809, 1133810, 1133812, 1133854, 1133902, 1133907, 85348, 1133931, 1133983, 1133986, 1133988, 1133989, 1133990, 1134001, 347583, 1134024, 1134028, 1134030, 1134057, 1134064, 1134071, 713301, 320086, 1134135, 1134140, 1134184, 85610, 1134188, 1106537, 1134203, 1134212, 1134221, 1134263, 1134266, 1134281, 1134306, 1134309, 1134313, 1134343, 1134357, 85798, 1134394, 1134405, 713357, 1134420, 1134422, 1134429, 1134436, 1134444, 1134449, 1134499, 1134500, 1134539, 1134552, 1134557, 1134558, 1134560, 1106598, 1134572, 1134583, 1134614, 844464, 1134639, 1134656, 1134658, 1134666, 1134676, 610414, 1134723, 1134728, 407575, 1134752, 1134784, 14371, 1134806, 1134807, 1134835, 1134838, 1134839, 1134846, 1134850, 1134853, 1134862, 1134871, 1134920, 1134926, 1134931, 1134945, 1134949, 1134952, 1134967, 1134978, 1134987, 1134998, 1135028, 1135039, 1135042, 1135052, 1135081, 1135094, 1135098, 1135106, 1135121, 1135142, 1135150, 538718, 1135165, 1135190, 669798, 1135234, 1135238, 1135249, 1135262, 1135274, 1135280, 1135301, 611027, 611049, 1135362, 1135395, 1135397, 1135438, 1135448, 1135498, 1135522, 1135525, 1135533, 1135553, 1135563, 1135568, 1135570, 1135605, 1135606, 320340, 1135625, 1135650, 1135684, 611400, 1135722, 1135727, 1135738, 873607, 1135780, 1135818, 1135841, 1135848, 1135856, 1135859, 1135875, 1135894, 1135903, 1128954, 1135933, 1135935, 1135966, 1136008, 1136013, 1136014, 1136028, 1136042, 1136073, 1136108, 1136113, 873986, 1136152, 1136180, 1136183, 1136188, 1136198, 1136212, 1136215, 1136233, 1136245, 1136283, 87730, 1136350, 1136384, 1136388, 1136397, 1136424, 1136425, 1136429, 612158, 1136482, 1136491, 612251, 612252, 1136550, 1136559, 1136584, 1136634, 874523, 1136676, 1136686, 1136724, 1136726, 1136728, 1136756, 1136763, 1136771, 1136793, 1136811, 1136824, 1136830, 1106978, 1136859, 1136890, 1136918, 1136948, 1136966, 612831, 858395, 55325, 582756, 613079, 88808, 88882, 613222, 1107092, 875518, 875528, 89100, 613422, 1107123, 89149, 1107132, 613576, 351473, 613642, 89360, 875793, 976102, 89480, 351625, 875937, 613827, 408145, 876017, 613911, 89633, 89634, 932447, 876108, 102330, 876154, 89877, 320832, 614338, 90139, 1110927, 614540, 352420, 614567, 614620, 1102868, 90368, 876885, 876934, 877076, 90708, 615000, 352949, 1114423, 615149, 91055, 877546, 877556, 353282, 91157, 353333, 877676, 615543, 714335, 91778, 1107568, 1120375, 616142, 796695, 91913, 354076, 878401, 354123, 878415, 92008, 92143, 92176, 616483, 963267, 971998, 845517, 616705, 92622, 879076, 1064020, 92670, 464999, 92713, 92742, 617033, 1107731, 617125, 1107745, 879329, 617192, 617223, 92974, 355339, 355519, 355540, 617733, 617742, 1107851, 880092, 15612, 880110, 618024, 59330, 880353, 94039, 618349, 356260, 59381, 880839, 2375, 230103, 881067, 881070, 94642, 881142, 409003, 619013, 583772, 881246, 1108075, 94866, 881324, 583824, 881533, 619408, 357297, 95167, 357336, 1108138, 1064450, 881738, 881767, 95381, 889718, 95448, 882005, 95594, 1108199, 619994, 95764, 357934, 620231, 88116, 882803, 96443, 620788, 620810, 334918, 96565, 96597, 358771, 883176, 883184, 96857, 671507, 561885, 621190, 359283, 359286, 621550, 359463, 1108487, 864818, 97466, 883929, 72228, 884131, 621989, 622023, 97834, 97873, 1108579, 622187, 622238, 622262, 884436, 622467, 1108632, 846492, 360488, 710329, 98415, 360650, 81842, 885018, 628109, 885095, 98675, 360822, 579403, 623112, 584499, 885663, 1108809, 1108811, 623603, 1108821, 55690, 1108867, 886048, 886243, 624210, 624304, 278827, 624426, 1108961, 886682, 409934, 1108993, 624662, 624689, 1109050, 195393, 100777, 887242, 1109070, 1109072, 100932, 100940, 625256, 1068306, 1077039, 1120685, 322709, 497477, 625633, 887806, 625685, 625691, 887840, 1109171, 887883, 101478, 888024, 578343, 60634, 101850, 626166, 626218, 364094, 888413, 847124, 143062, 789014, 1109288, 626517, 626536, 888689, 1129452, 888762, 102366, 497659, 626761, 626823, 626866, 1109365, 1065678, 1109381, 889197, 1109396, 191894, 410350, 1109407, 1065739, 365139, 675569, 103128, 60902, 889757, 103328, 103402, 323085, 103595, 890075, 628039, 628136, 366118, 247876, 366342, 890643, 628564, 61075, 628657, 1109680, 628808, 890953, 498021, 579601, 891083, 148515, 1109722, 104758, 54818, 891432, 629337, 629420, 105183, 891634, 1109806, 61240, 578560, 105367, 891987, 105574, 105604, 629913, 1111030, 630092, 630123, 885159, 892353, 368106, 368124, 892490, 892584, 1109969, 847829, 847831, 106320, 892768, 1110001, 630845, 893117, 368900, 893211, 631108, 631214, 369105, 61519, 631266, 893530, 369311, 1110129, 107205, 893657, 893658, 893756, 701898, 631665, 631789, 631852, 107701, 894139, 61659, 367519, 632456, 72298, 108391, 586049, 1111049, 370635, 108500, 370750, 632935, 1110391, 633073, 633124, 633137, 108965, 633411, 895721, 895787, 895931, 1110531, 633998, 634055, 848431, 544859, 542609, 804755, 896446, 491017, 848496, 896746, 634650, 110375, 324242, 634856, 841671, 1110710, 110736, 935962, 897222, 635081, 56067, 761250, 111077, 373224, 457609, 635379, 1110794, 326685, 635729, 897878, 897891, 897892, 1023476, 630264, 111704, 636093, 1110904, 972647, 636141, 111894, 636208, 334263, 898402, 636348, 636417, 334194, 112175, 636603, 898753, 936255, 636814, 898971, 899008, 374724, 899014, 112638, 899076, 637004, 637005, 637080, 899268, 112864, 637187, 112928, 62517, 62521, 899428, 375219, 899511, 1023767, 899741, 637695, 637698, 899876, 544978, 899891, 1111188, 637909, 113645, 637960, 1111214, 281106, 761705, 638163, 845725, 1111275, 674396, 638532, 900690, 900867, 900870, 901007, 638898, 901412, 639288, 1111439, 377304, 1111460, 639500, 901678, 115254, 639618, 325057, 377534, 237689, 115594, 82412, 115718, 412503, 377887, 237772, 115952, 902410, 378218, 902586, 325213, 902790, 378632, 902969, 640857, 841870, 640885, 116653, 116659, 1068025, 378866, 903268, 325310, 641164, 641193, 117115, 841979, 903643, 903661, 893358, 641617, 1103766, 903790, 903975, 588762, 500204, 1111874, 379846, 580040, 117829, 187234, 642144, 117862, 117910, 642252, 117965, 904461, 642328, 904565, 868111, 642587, 904780, 642647, 118372, 859388, 642741, 642760, 849869, 118484, 1068326, 642800, 642841, 905050, 150873, 643181, 544108, 643223, 1112097, 643303, 643328, 905574, 905638, 119263, 675245, 1112152, 119400, 282050, 643749, 803862, 1103838, 1112568, 119531, 643870, 769008, 1112203, 587923, 850078, 413172, 906203, 906238, 644204, 906391, 544294, 906491, 644356, 644392, 382254, 587999, 644416, 382407, 644637, 644678, 120398, 644699, 117487, 631766, 120593, 1121333, 645080, 907301, 907310, 383029, 907322, 121025, 645349, 500775, 383220, 121113, 907576, 1112486, 907807, 121431, 121484, 121488, 645818, 850384, 1130095, 68896, 383847, 121746, 121843, 719371, 646245, 1112596, 122011, 326176, 369873, 632020, 122298, 1079717, 1112658, 908897, 981606, 646814, 909052, 1112705, 122644, 1112709, 122690, 122724, 122795, 282652, 123090, 909549, 909560, 894466, 909736, 647597, 632223, 647637, 64259, 588563, 123525, 647887, 544890, 123648, 56494, 981828, 910246, 648174, 123919, 386091, 1103987, 123980, 648296, 386213, 239143, 100154, 124244, 910690, 910699, 124291, 648638, 124607, 648925, 648975, 807223, 386934, 124798, 649104, 124895, 124938, 1043073, 125037, 649335, 911480, 588848, 1113175, 676274, 632671, 912165, 125791, 912272, 604884, 125929, 388130, 388265, 388319, 650476, 650559, 912931, 1113353, 912970, 126582, 650966, 388981, 126848, 913285, 126866, 326988, 1113425, 913435, 389192, 414393, 913574, 127150, 851318, 853864, 389501, 389506, 327062, 651679, 389541, 651708, 938754, 651821, 389739, 389808, 651969, 914186, 389908, 127812, 632755, 390313, 128174, 652495, 128291, 128365, 128521, 1113692, 108813, 128604, 390770, 652917, 1113724, 653077, 653090, 128874, 391077, 915433, 129008, 1026391, 239971, 653413, 653450, 653517, 653560, 653579, 129347, 653662, 129435, 129457, 1113847, 916050, 916107, 391829, 65305, 129695, 129700, 916214, 392124, 916453, 392181, 851807, 65383, 130306, 916768, 65416, 458638, 21744, 392610, 1114021, 654897, 1114044, 917100, 130694, 392882, 917233, 917252, 130858, 917298, 130951, 917436, 655332, 655373, 393238, 370985, 917567, 917606, 1114132, 131247, 393411, 655607, 917813, 131405, 655728, 393611, 415141, 131573, 131617, 1070541, 131850, 394040, 656267, 394148, 132007, 918446, 64421, 1114277, 394208, 1114279, 132133, 656422, 918605, 918750, 132345, 132423, 132469, 132495, 132575, 656994, 677485, 1114402, 394980, 657161, 132938, 919556, 56808, 395326, 109587, 919673, 919707, 657594, 919760, 919771, 133385, 22247, 1008285, 395660, 395665, 1114524, 1114547, 133747, 920289, 396098, 396122, 658273, 920435, 328169, 396197, 134127, 658427, 396312, 22364, 1114643, 1114660, 1121794, 134469, 1114690, 658865, 134628, 396870, 921193, 396967, 396974, 1114739, 396995, 134875, 921322, 134903, 134905, 659297, 659385, 153592, 659458, 1114804, 921716, 921746, 135347, 503164, 397550, 323874, 397564, 659825, 659901, 135634, 397803, 1114882, 135702, 1114887, 634302, 922237, 135821, 135936, 398127, 660426, 136168, 983896, 1114974, 398438, 660681, 1027626, 136473, 136476, 136578, 809209, 660983, 136726, 983987, 923189, 923221, 399035, 22836, 896672, 923726, 923757, 661623, 399503, 661717, 399595, 137468, 661763, 137554, 399701, 399730, 137662, 137674, 661986, 661990, 137712, 662028, 280245, 662108, 662167, 1115248, 849376, 318841, 138157, 400354, 138263, 940548, 400435, 400491, 662687, 1115334, 924854, 1115339, 400681, 924975, 925041, 925119, 662982, 788278, 400908, 842609, 547426, 925292, 663156, 663182, 138933, 139000, 139175, 401335, 139340, 401572, 663755, 198015, 663891, 139619, 663916, 926094, 664034, 926191, 853437, 402132, 402135, 402300, 664497, 664537, 664540, 664605, 1115656, 118151, 402532, 402595, 664751, 664775, 1115693, 402732, 402799, 402832, 927126, 665022, 140770, 927216, 402991, 403035, 547771, 403145, 141078, 1115798, 403399, 403520, 1028448, 927899, 141475, 853707, 665875, 842703, 403824, 928128, 403914, 666238, 67359, 928453, 772885, 404221, 198444, 142148, 404407, 928755, 666615, 1116016, 1114768, 142656, 404889, 404968, 929366, 1116092, 1116096, 143012, 405183, 897659, 67545, 405298, 405322, 1116134, 929693, 929714, 667607, 1116180, 405684, 405761, 405780, 143680, 143721, 1116221, 405974, 1116242, 143889, 668190, 143955, 1116260, 668300, 406187, 144050, 144051, 406237, 144138, 24041, 668648, 930833, 345241, 406582, 406640, 144498, 931027, 406838, 668999, 406920, 1116402, 144842, 407007, 144872, 144952, 931401, 931576, 111573, 407302, 931621, 407383, 931678, 407457, 145391, 67937, 373795, 145662, 854417, 932294, 932299, 932340, 932412, 408157, 408210, 670360, 1116612, 592333, 146170, 1116633, 68088, 932859, 319123, 408686, 146574, 903976, 671027, 408905, 408908, 146783, 671117, 68164, 408986, 24480, 409031, 409057, 1116763, 409157, 147064, 327873, 933594, 854655, 1116816, 671630, 409515, 1116829, 671720, 1116845, 147537, 671829, 147550, 409708, 409736, 409754, 409853, 147746, 934193, 458832, 1116903, 680004, 1116927, 672262, 68360, 410169, 148159, 410329, 148209, 410387, 410391, 148322, 672626, 68432, 410635, 148503, 982127, 673041, 410946, 854924, 673175, 148898, 673223, 148977, 57068, 935437, 66067, 1073430, 935643, 411392, 411421, 673666, 673689, 935870, 411600, 149491, 1117182, 1117183, 68610, 810947, 411822, 1117235, 936261, 319235, 412073, 636661, 149975, 149979, 936422, 412136, 418216, 150029, 412182, 287159, 493826, 1117295, 150207, 412357, 674504, 674513, 412395, 412407, 412410, 674566, 674571, 936777, 150347, 674691, 150443, 1117343, 150505, 674867, 150595, 412750, 1029987, 674956, 412817, 858085, 412865, 937168, 1117394, 1117398, 1117405, 413054, 150926, 413075, 112541, 855288, 675316, 937486, 1117451, 675430, 937603, 413335, 937626, 767957, 937753, 1117495, 151408, 675811, 675823, 675920, 1117542, 413801, 676113, 676426, 414315, 505980, 1117650, 25398, 1073975, 676791, 939007, 152627, 939096, 414899, 414970, 677133, 415021, 677269, 1074076, 677292, 415155, 677304, 939453, 1117771, 415184, 939521, 153123, 677421, 153239, 939698, 415438, 153374, 415526, 415661, 415776, 1117872, 1117874, 153663, 678053, 1113709, 593611, 153809, 860569, 153868, 416161, 375206, 154164, 678466, 154285, 375244, 154316, 678672, 416561, 678724, 154441, 416646, 117174, 416672, 416692, 416705, 416738, 1118040, 941093, 416823, 941232, 681173, 154904, 941346, 1105021, 1113751, 679438, 1118127, 679482, 331716, 113269, 679625, 941780, 417717, 417895, 1118226, 417905, 1122505, 680190, 680225, 680230, 418157, 942491, 1118286, 856149, 156181, 680480, 418389, 942728, 156302, 680613, 942763, 418501, 840770, 418615, 156479, 156548, 680854, 418725, 943014, 415479, 680890, 594086, 418801, 418832, 156707, 418883, 681047, 156776, 418952, 200918, 1118435, 681317, 419192, 943613, 943638, 943913, 157565, 419729, 258390, 1074859, 157744, 856417, 944345, 944428, 420169, 158026, 158054, 840782, 682567, 1118647, 944730, 158469, 158569, 682859, 682902, 158714, 420872, 158752, 1118734, 945385, 1121860, 1118369, 683410, 159127, 1118806, 1118820, 683641, 419783, 869486, 159535, 1118868, 159767, 159842, 159867, 159922, 1118921, 946406, 159992, 114048, 160010, 684324, 1118941, 422217, 684502, 1118974, 684536, 1131383, 160276, 160309, 160313, 946751, 856861, 946910, 160574, 684899, 947066, 947119, 684994, 685004, 422890, 422938, 160801, 947416, 947430, 31548, 161027, 423205, 161162, 275968, 161346, 685661, 1119167, 161434, 161474, 947981, 423807, 685998, 423863, 948176, 161766, 1119230, 948232, 686139, 686229, 1119259, 245463, 686392, 686422, 424280, 686436, 424318, 96933, 1119355, 162657, 1105276, 162696, 949129, 1119384, 424856, 687020, 1119390, 162946, 949501, 949516, 425438, 949738, 145104, 425588, 1119514, 687792, 425694, 1119531, 687991, 1111417, 950222, 988508, 688208, 950390, 114725, 426403, 688646, 950862, 426666, 688815, 1119695, 71120, 164798, 689120, 1119740, 427033, 639157, 164940, 1119760, 188803, 165037, 427220, 427221, 951558, 427311, 689461, 427372, 165237, 333375, 165287, 427505, 689657, 165393, 952165, 165977, 71383, 952722, 952749, 166325, 428479, 952768, 428494, 166356, 428503, 952866, 158817, 952938, 166508, 690869, 428744, 166683, 428836, 428928, 953258, 691188, 429119, 974485, 429182, 857963, 429205, 1105422, 429228, 726929, 691711, 691798, 167533, 954096, 691956, 429843, 840845, 429876, 1120236, 692151, 167873, 692201, 954363, 692238, 430114, 167974, 145260, 430258, 1120322, 430536, 168445, 692815, 430704, 955028, 430755, 168655, 168786, 955228, 693097, 1120404, 168854, 693157, 168906, 168959, 431159, 693353, 169166, 1120466, 693494, 169257, 955763, 955888, 431669, 169584, 693880, 1114236, 1120537, 956142, 1120563, 694087, 694106, 431988, 432012, 169964, 694270, 1120599, 432162, 1120621, 432315, 1131713, 956670, 1120651, 432503, 334232, 694683, 694739, 904767, 1120678, 170498, 1120689, 694851, 957181, 1120726, 695196, 957479, 84257, 1114290, 957686, 695697, 171431, 695737, 957990, 858717, 902411, 374178, 433786, 1123034, 596613, 171824, 696148, 434008, 696173, 696201, 28653, 958390, 696261, 958435, 858790, 1120945, 696375, 696381, 434295, 696519, 696533, 958846, 904849, 172940, 334662, 435130, 116221, 697374, 959564, 173181, 815308, 959723, 435526, 697773, 1121191, 697861, 843768, 173661, 960142, 435864, 173834, 1121251, 174034, 960571, 436285, 174157, 436325, 960734, 174305, 541429, 174351, 960803, 698703, 436582, 465990, 960998, 1121369, 1121374, 174722, 902935, 961305, 75656, 699218, 699279, 437191, 961525, 1114420, 175228, 175258, 509832, 699726, 961879, 699817, 903073, 72956, 699896, 962160, 437910, 291448, 437986, 946839, 700345, 176124, 700430, 1125466, 438344, 481998, 176276, 700590, 700756, 176499, 438891, 701050, 176781, 176953, 439146, 963471, 177167, 791513, 597520, 963943, 177610, 439766, 701919, 902014, 116991, 439879, 1121892, 177775, 439929, 440144, 702298, 1121941, 964482, 964554, 964608, 702508, 861865, 702598, 608244, 1125481, 702722, 554031, 178575, 178610, 702919, 702952, 728829, 178677, 1114542, 440973, 703134, 703145, 703240, 703298, 441204, 179066, 965627, 179219, 703554, 179309, 179395, 703736, 884569, 870157, 1070930, 861435, 179876, 704182, 179955, 1122267, 1122271, 180091, 442307, 442313, 966614, 58374, 423254, 1122316, 180298, 180370, 966925, 1122352, 180593, 705131, 967278, 705174, 180956, 180979, 967419, 1122458, 181213, 705551, 967706, 181305, 181329, 181479, 443694, 705905, 443766, 181644, 968186, 968238, 1122569, 443964, 1122591, 706291, 1122601, 1122610, 706373, 444233, 706401, 968552, 444318, 1024250, 706501, 968667, 1122643, 706624, 444485, 182350, 968788, 1122662, 336236, 968847, 444598, 706780, 968932, 444688, 968995, 706900, 706985, 445026, 707431, 336379, 707577, 707645, 707689, 969984, 707853, 183696, 1122894, 642301, 708094, 708144, 183919, 1122920, 708271, 446160, 970549, 1122957, 970577, 446290, 184221, 184223, 446448, 184333, 184355, 708671, 184497, 1123046, 1123057, 84713, 709056, 1123069, 642473, 184833, 971331, 971415, 971528, 185119, 971564, 971626, 709492, 709494, 709522, 709726, 1123191, 1079501, 447697, 972092, 447912, 448000, 448042, 448059, 972396, 186071, 448305, 380561, 1123337, 972896, 710756, 186484, 710887, 710970, 973336, 973416, 449154, 1123435, 449252, 973614, 1123469, 187317, 1123488, 187330, 187371, 449539, 711802, 973951, 449717, 187574, 187585, 852966, 187675, 712006, 187763, 712140, 188053, 450290, 188166, 712468, 1123584, 188273, 450426, 974727, 188318, 450493, 450498, 450543, 188445, 67379, 974906, 450640, 975031, 407816, 975140, 188784, 713127, 555772, 713278, 451150, 1123822, 975774, 189365, 975809, 975821, 1123837, 975865, 975875, 451642, 451643, 189529, 976015, 451826, 976293, 1123915, 1114148, 1123917, 452155, 190070, 643361, 452298, 190164, 1123961, 452336, 452385, 976678, 452422, 976771, 976827, 714849, 714863, 714881, 966542, 977156, 1124059, 452924, 190845, 715140, 1124090, 191143, 453305, 191189, 453350, 137306, 905706, 715661, 977828, 977852, 715756, 715765, 993117, 978017, 1124198, 191625, 978096, 978121, 453857, 1124221, 716082, 163053, 978422, 978459, 282352, 192284, 192397, 716717, 978995, 979007, 979044, 1124369, 643775, 1124373, 979086, 454824, 716995, 717111, 192846, 1124418, 455256, 979571, 731251, 455359, 455371, 717538, 455425, 979749, 455513, 455561, 979890, 862380, 717849, 717873, 980023, 193676, 1124550, 980185, 980205, 455957, 980263, 718133, 456002, 980406, 718295, 1120348, 718364, 980533, 718489, 718490, 456383, 194320, 1037279, 456674, 194563, 980996, 722260, 731483, 456807, 163517, 981207, 981240, 1123804, 1037361, 194893, 207250, 457118, 719438, 195189, 981696, 457426, 195312, 981837, 981948, 1124863, 982019, 982104, 195677, 1124882, 982144, 982229, 457951, 982377, 600638, 195958, 720395, 1124957, 196233, 982696, 982810, 196450, 458616, 1125013, 458674, 982967, 458688, 720868, 196599, 983068, 720949, 688140, 853267, 983196, 1125075, 1125081, 721273, 459153, 197069, 819279, 197174, 459398, 983787, 459503, 721661, 197375, 76591, 721708, 197487, 1125183, 644600, 721882, 721942, 1125227, 984245, 460002, 197875, 722189, 722220, 1125260, 197965, 984476, 1115187, 984620, 1125306, 984702, 460506, 722676, 460663, 984952, 198536, 198610, 985070, 198698, 722996, 460855, 198894, 198905, 426367, 461052, 723295, 461190, 723457, 723486, 1125483, 426435, 1125495, 723688, 199407, 985840, 77398, 1125510, 985900, 985913, 199508, 461659, 723897, 986094, 723975, 601268, 461950, 199831, 251774, 462111, 722441, 724400, 200144, 1029607, 862111, 994564, 200228, 986693, 1125651, 724606, 724623, 724657, 1125670, 724767, 986927, 986960, 1125690, 1125706, 462794, 200695, 725044, 200782, 426684, 987306, 463021, 645149, 1125763, 463137, 463180, 725355, 463295, 201194, 463421, 463507, 201381, 725715, 725726, 201444, 987894, 463660, 463679, 725828, 863720, 725969, 725996, 988153, 689019, 726379, 464240, 329704, 988542, 1125954, 988595, 464354, 988661, 732756, 202245, 202250, 202310, 464465, 121109, 202384, 464548, 120219, 190054, 726834, 989078, 989086, 202664, 726971, 295928, 1132796, 464864, 202726, 464904, 989196, 1126064, 464930, 727154, 727181, 951320, 989398, 727291, 465156, 989526, 989622, 609071, 722550, 727572, 203321, 989795, 989850, 989869, 203578, 990049, 990059, 727943, 203720, 465920, 203783, 203790, 728112, 990480, 204088, 466338, 990628, 466400, 466454, 466456, 466536, 687671, 728735, 466657, 204520, 990969, 991000, 204701, 466865, 466878, 729023, 729058, 670004, 1126403, 466968, 991278, 204851, 24280, 991332, 204904, 991361, 204951, 204957, 78181, 991583, 991798, 205433, 729755, 602263, 908101, 992162, 730062, 467932, 383831, 730149, 340145, 730156, 992308, 468021, 205954, 205959, 992595, 34366, 730541, 468437, 468465, 992867, 252864, 992904, 730832, 468730, 993055, 1126709, 206738, 1126736, 731129, 469121, 469236, 733585, 469356, 993677, 233856, 1126817, 731662, 731691, 993838, 469566, 995778, 427801, 207572, 207703, 207754, 469976, 207879, 78352, 122049, 602653, 602654, 470372, 1126981, 994761, 994762, 1019506, 208344, 331343, 864853, 208394, 208417, 864864, 208493, 732794, 470680, 208657, 600524, 208702, 470933, 952306, 995250, 78497, 78501, 471017, 995529, 908665, 995599, 995721, 995898, 209497, 209531, 515531, 321226, 733956, 996146, 209769, 209797, 996236, 340815, 472232, 996592, 734466, 734529, 603021, 996730, 472445, 734678, 996851, 996876, 734746, 734758, 865206, 734136, 734836, 865218, 997024, 865253, 210808, 472957, 997278, 473020, 473028, 473029, 473062, 997437, 473182, 515785, 735469, 735502, 997654, 997671, 473495, 253693, 909115, 736117, 736234, 998480, 736364, 212146, 862515, 474468, 736703, 212559, 736936, 216579, 1127718, 474875, 474961, 737112, 1115677, 737266, 999522, 999612, 166606, 999644, 1127802, 251513, 475394, 475408, 184640, 213353, 213365, 737654, 999829, 999910, 737913, 213726, 213758, 1127912, 738248, 691028, 214041, 738451, 341529, 79390, 172986, 1127990, 603714, 1000865, 1000893, 738788, 111891, 738829, 1000993, 738859, 79457, 214625, 909587, 739143, 739166, 739171, 477176, 1001465, 1001492, 1124480, 477474, 739636, 1115760, 477552, 862621, 1001961, 1001968, 739828, 1128198, 477793, 739954, 739996, 215744, 341802, 1002287, 740168, 1128270, 740366, 478255, 1002572, 1128291, 1107057, 1002690, 775366, 647777, 216656, 216731, 216736, 1128402, 866276, 1128427, 1003514, 1003630, 479358, 741514, 1003747, 479475, 479495, 479527, 1003909, 1003917, 479687, 1128562, 36299, 735360, 1004330, 480056, 742238, 217999, 604332, 480320, 1004774, 866523, 742695, 480646, 742800, 1004948, 742808, 1005149, 1005174, 779201, 480917, 866615, 1005440, 604500, 743489, 779295, 219453, 743777, 219723, 219797, 219809, 219844, 219898, 744230, 482144, 1128939, 80372, 1128949, 1128956, 429906, 482382, 744538, 220290, 220352, 220398, 482580, 744835, 1129042, 745060, 745317, 1007481, 745373, 483253, 692258, 745784, 745863, 745872, 671862, 36951, 1008191, 1129241, 221987, 1008453, 1008502, 1041905, 222132, 222133, 80718, 484350, 484467, 867190, 484556, 998270, 484646, 1008938, 1009016, 746963, 747004, 484881, 1009291, 1009408, 747285, 1129436, 941435, 223160, 747566, 1129483, 747597, 867346, 485567, 1009859, 1129508, 485594, 1010069, 747939, 998479, 748018, 485891, 1010270, 1010376, 1010426, 486173, 748434, 486312, 1129633, 224261, 486431, 748579, 748843, 486716, 486814, 224688, 1011124, 486839, 486939, 1011337, 1011348, 749215, 749326, 487275, 897859, 867648, 124943, 1011812, 1129835, 225419, 955087, 749813, 343439, 256066, 749976, 225703, 1129886, 750029, 487934, 750114, 750167, 1012318, 488073, 225986, 226012, 226190, 488426, 1012829, 488571, 924092, 226572, 750926, 488839, 226741, 488887, 1013228, 1013231, 489013, 1013322, 751255, 751289, 489166, 1130105, 489238, 227104, 1013556, 1013640, 227230, 1013679, 751571, 751621, 227362, 1013904, 227516, 1130199, 1014055, 452761, 955453, 1014167, 1014189, 490000, 490071, 605977, 1014437, 81693, 752388, 752441, 1130296, 752488, 868157, 490358, 752686, 911889, 490612, 1130345, 1014933, 490752, 752914, 1130378, 228670, 1130383, 228769, 606110, 490936, 753089, 490982, 1015289, 753220, 229045, 491200, 868300, 1130449, 229107, 1106615, 1015668, 229254, 1015949, 753921, 1130541, 229677, 1130558, 344155, 387851, 230013, 1016486, 868477, 1016577, 1130635, 169442, 754567, 1016732, 754589, 754609, 230365, 431674, 755062, 230808, 230824, 493020, 230878, 1017442, 493218, 1017519, 755381, 493262, 231134, 755461, 344368, 493439, 755688, 1017930, 493723, 493829, 493845, 493900, 1018202, 606609, 257085, 1018290, 25179, 494111, 1130933, 232008, 1018475, 494285, 606672, 1018792, 956231, 756681, 904125, 756790, 338487, 232609, 494786, 756934, 1131046, 1019246, 232919, 1019366, 495159, 495243, 1019607, 233178, 233185, 1019720, 1019740, 1125086, 495570, 495608, 495618, 233529, 1020019, 233881, 233900, 869124, 1020489, 758422, 496334, 301180, 234277, 234583, 758918, 1131379, 759007, 496927, 759101, 759125, 1021302, 1021505, 497356, 759514, 235280, 235309, 497483, 913041, 1131507, 497632, 235560, 1022004, 869395, 497813, 497919, 760171, 941866, 1131613, 760387, 236105, 1022554, 853522, 236254, 1022698, 236269, 236359, 1107730, 498576, 760825, 761014, 236763, 236776, 236793, 236824, 761225, 761313, 499188, 761430, 1044414, 499364, 499385, 237335, 237441, 83266, 237465, 499666, 869721, 761883, 761907, 607582, 761941, 762035, 762072, 1131909, 563938, 1024432, 762434, 741173, 500447, 1024775, 500574, 1024923, 500646, 762803, 500680, 762865, 869887, 1025072, 500794, 1025104, 500832, 763063, 238804, 1132087, 1025444, 239147, 763443, 239245, 763534, 239250, 501442, 763641, 763748, 501645, 239589, 501878, 501894, 83666, 502000, 502104, 502221, 240102, 83712, 764533, 1026733, 502453, 127437, 870184, 1026842, 764766, 8008, 1132360, 765070, 240792, 240865, 880930, 1027534, 1132452, 1125347, 1027591, 346031, 846022, 503482, 1027793, 40228, 765659, 241399, 503613, 1110190, 695643, 503949, 503963, 766142, 1116643, 504057, 241937, 242042, 1107919, 766379, 242115, 1132628, 1028701, 1028772, 242583, 242603, 504751, 242796, 433683, 1125406, 243076, 767404, 767490, 767499, 243224, 767549, 767589, 243365, 1029816, 767678, 767703, 652242, 1029871, 1132847, 505647, 767991, 1030163, 1132903, 768114, 505992, 243874, 1132917, 243941, 1132925, 243972, 1030502, 506278, 768557, 1030770, 1132991, 506577, 244472, 1030949, 1030954, 1030994, 506768, 768916, 768939, 1031152, 608785, 507093, 1031384, 507139, 95449, 769310, 259312, 769449, 1031609, 507424, 245295, 565173, 1031850, 507583, 1031935, 303070, 1133194, 245620, 128322, 1032101, 245833, 958484, 508162, 1032499, 770356, 246076, 508254, 246118, 770472, 1032658, 783779, 770534, 1032680, 1032694, 1032719, 1032729, 1125519, 770613, 246327, 508476, 770648, 508510, 1032978, 84797, 508821, 246724, 508870, 509003, 1033381, 128543, 247025, 1033544, 1033642, 84901, 771555, 1125556, 771730, 783989, 247506, 509654, 1033989, 1033997, 1034015, 1133533, 1133535, 347113, 771994, 1034154, 200289, 247819, 247821, 1034261, 510004, 1034305, 1034337, 248028, 1034491, 772409, 772507, 128757, 1034724, 1034759, 248362, 510514, 248407, 510645, 772833, 772836, 303585, 772968, 1035153, 773025, 773040, 347294, 1035339, 1035340, 1035354, 511215, 1035658, 511401, 249267, 511466, 1133827, 249364, 773878, 609580, 1078819, 511806, 511841, 774027, 249814, 871767, 774158, 512069, 512146, 1036468, 512264, 774472, 512378, 512423, 1036759, 1036776, 512536, 1036830, 1036844, 512569, 512621, 959256, 1037088, 250673, 1134049, 1037159, 139285, 129183, 250836, 512985, 250927, 1037376, 8356, 1134109, 513232, 775430, 513303, 775459, 775487, 1003074, 1134157, 1037830, 1037871, 513591, 1037917, 1037969, 1037981, 609922, 513836, 513838, 514029, 1134251, 1046889, 1134272, 435223, 1134277, 514241, 252103, 514264, 514360, 776547, 514421, 252314, 252441, 872213, 776859, 776865, 1039098, 515005, 252887, 515064, 1129518, 515112, 515123, 515217, 515273, 1039636, 777578, 828649, 1039787, 253406, 653909, 777839, 777906, 515775, 253711, 129672, 778062, 1040212, 253834, 253837, 516087, 516185, 872463, 1040530, 1040637, 516429, 1040730, 1040752, 778620, 516532, 1040890, 254576, 86135, 779025, 254740, 1041217, 516941, 741537, 254923, 779256, 517135, 1134769, 255016, 255025, 1041478, 87055, 1041628, 779540, 1041743, 1029806, 86290, 517763, 517928, 255889, 610645, 60900, 855243, 518220, 1042543, 256245, 122010, 518578, 907635, 518785, 518796, 256692, 1043138, 1043151, 130168, 1043229, 519027, 1043346, 1043433, 872978, 741803, 781539, 781689, 820372, 506130, 781808, 1043976, 602152, 610923, 781987, 257783, 174039, 782125, 782208, 1044449, 258032, 520202, 520273, 1016027, 1125919, 1044869, 1059674, 782783, 130467, 258617, 305251, 357777, 1045190, 521026, 130543, 521147, 783335, 259070, 521254, 1045593, 521329, 521342, 521367, 783583, 1045742, 130610, 259437, 783751, 584757, 521791, 1046115, 521869, 1046166, 784039, 784092, 796068, 1046316, 522054, 259924, 522212, 260080, 1135619, 522358, 1046684, 261830, 260302, 1046757, 130776, 1046881, 522821, 611442, 1047406, 523197, 261101, 1048410, 1135796, 261207, 43548, 261295, 864153, 261661, 218384, 786171, 524266, 567976],
+}
diff --git a/pyserini/resources/beir.yaml b/pyserini/resources/beir.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e488ec92f37da2eb4afb9999304704ec3db45938
--- /dev/null
+++ b/pyserini/resources/beir.yaml
@@ -0,0 +1,741 @@
+conditions:
+  - name: bm25-flat
+    command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query
+    datasets:
+      - dataset: trec-covid
+        scores:
+          - nDCG@10: 0.5947
+            R@100: 0.1091
+            R@1000: 0.3955
+      - dataset: bioasq
+        scores:
+          - nDCG@10: 0.5225
+            R@100: 0.7687
+            R@1000: 0.9030
+      - dataset: nfcorpus
+        scores:
+          - nDCG@10: 0.3218
+            R@100: 0.2457
+            R@1000: 0.3704
+      - dataset: nq
+        scores:
+          - nDCG@10: 0.3055
+            R@100: 0.7513
+            R@1000: 0.8958
+      - dataset: hotpotqa
+        scores:
+          - nDCG@10: 0.6330
+            R@100: 0.7957
+            R@1000: 0.8820
+      - dataset: fiqa
+        scores:
+          - nDCG@10: 0.2361
+            R@100: 0.5395
+            R@1000: 0.7393
+      - dataset: signal1m
+        scores:
+          - nDCG@10: 0.3304
+            R@100: 0.3703
+            R@1000: 0.5642
+      - dataset: trec-news
+        scores:
+          - nDCG@10: 0.3952
+            R@100: 0.4469
+            R@1000: 0.7051
+      - dataset: robust04
+        scores:
+          - nDCG@10: 0.4070
+            R@100: 0.3746
+            R@1000: 0.6345
+      - dataset: arguana
+        scores:
+          - nDCG@10: 0.3970
+            R@100: 0.9324
+            R@1000: 0.9872
+      - dataset: webis-touche2020
+        scores:
+          - nDCG@10: 0.4422
+            R@100: 0.5822
+            R@1000: 0.8621
+      - dataset: cqadupstack-android
+        scores:
+          - nDCG@10: 0.3801
+            R@100: 0.6829
+            R@1000: 0.8632
+      - dataset: cqadupstack-english
+        scores:
+          - nDCG@10: 0.3453
+            R@100: 0.5757
+            R@1000: 0.7323
+      - dataset: cqadupstack-gaming
+        scores:
+          - nDCG@10: 0.4822
+            R@100: 0.7651
+            R@1000: 0.8945
+      - dataset: cqadupstack-gis
+        scores:
+          - nDCG@10: 0.2901
+            R@100: 0.6119
+            R@1000: 0.8174
+      - dataset: cqadupstack-mathematica
+        scores:
+          - nDCG@10: 0.2015
+            R@100: 0.4877
+            R@1000: 0.7221
+      - dataset: cqadupstack-physics
+        scores:
+          - nDCG@10: 0.3214
+            R@100: 0.6326
+            R@1000: 0.8340
+      - dataset: cqadupstack-programmers
+        scores:
+          - nDCG@10: 0.2802
+            R@100: 0.5588
+            R@1000: 0.7734
+      - dataset: cqadupstack-stats
+        scores:
+          - nDCG@10: 0.2711
+            R@100: 0.5338
+            R@1000: 0.7310
+      - dataset: cqadupstack-tex
+        scores:
+          - nDCG@10: 0.2244
+            R@100: 0.4686
+            R@1000: 0.6907
+      - dataset: cqadupstack-unix
+        scores:
+          - nDCG@10: 0.2749
+            R@100: 0.5417
+            R@1000: 0.7616
+      - dataset: cqadupstack-webmasters
+        scores:
+          - nDCG@10: 0.3059
+            R@100: 0.5820
+            R@1000: 0.8066
+      - dataset: cqadupstack-wordpress
+        scores:
+          - nDCG@10: 0.2483
+            R@100: 0.5152
+            R@1000: 0.7552
+      - dataset: quora
+        scores:
+          - nDCG@10: 0.7886
+            R@100: 0.9733
+            R@1000: 0.9950
+      - dataset: dbpedia-entity
+        scores:
+          - nDCG@10: 0.3180
+            R@100: 0.4682
+            R@1000: 0.6760
+      - dataset: scidocs
+        scores:
+          - nDCG@10: 0.1490
+            R@100: 0.3477
+            R@1000: 0.5638
+      - dataset: fever
+        scores:
+          - nDCG@10: 0.6513
+            R@100: 0.9185
+            R@1000: 0.9589
+      - dataset: climate-fever
+        scores:
+          - nDCG@10: 0.1651
+            R@100: 0.4249
+            R@1000: 0.6324
+      - dataset: scifact
+        scores:
+          - nDCG@10: 0.6789
+            R@100: 0.9253
+            R@1000: 0.9767
+  - name: bm25-multifield
+    command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0
+    datasets:
+      - dataset: trec-covid
+        scores:
+          - nDCG@10: 0.6559
+            R@100: 0.1141
+            R@1000: 0.3891
+      - dataset: bioasq
+        scores:
+          - nDCG@10: 0.4646
+            R@100: 0.7145
+            R@1000: 0.8428
+      - dataset: nfcorpus
+        scores:
+          - nDCG@10: 0.3254
+            R@100: 0.2500
+            R@1000: 0.3718
+      - dataset: nq
+        scores:
+          - nDCG@10: 0.3285
+            R@100: 0.7597
+            R@1000: 0.9019
+      - dataset: hotpotqa
+        scores:
+          - nDCG@10: 0.6027
+            R@100: 0.7400
+            R@1000: 0.8405
+      - dataset: fiqa
+        scores:
+          - nDCG@10: 0.2361
+            R@100: 0.5395
+            R@1000: 0.7393
+      - dataset: signal1m
+        scores:
+          - nDCG@10: 0.3304
+            R@100: 0.3703
+            R@1000: 0.5642
+      - dataset: trec-news
+        scores:
+          - nDCG@10: 0.3977
+            R@100: 0.4216
+            R@1000: 0.6993
+      - dataset: robust04
+        scores:
+          - nDCG@10: 0.4070
+            R@100: 0.3746
+            R@1000: 0.6345
+      - dataset: arguana
+        scores:
+          - nDCG@10: 0.4142
+            R@100: 0.9431
+            R@1000: 0.9893
+      - dataset: webis-touche2020
+        scores:
+          - nDCG@10: 0.3673
+            R@100: 0.5376
+            R@1000: 0.8668
+      - dataset: cqadupstack-android
+        scores:
+          - nDCG@10: 0.3709
+            R@100: 0.6889
+            R@1000: 0.8712
+      - dataset: cqadupstack-english
+        scores:
+          - nDCG@10: 0.3321
+            R@100: 0.5842
+            R@1000: 0.7574
+      - dataset: cqadupstack-gaming
+        scores:
+          - nDCG@10: 0.4418
+            R@100: 0.7571
+            R@1000: 0.8882
+      - dataset: cqadupstack-gis
+        scores:
+          - nDCG@10: 0.2904
+            R@100: 0.6458
+            R@1000: 0.8248
+      - dataset: cqadupstack-mathematica
+        scores:
+          - nDCG@10: 0.2046
+            R@100: 0.5215
+            R@1000: 0.7559
+      - dataset: cqadupstack-physics
+        scores:
+          - nDCG@10: 0.3248
+            R@100: 0.6486
+            R@1000: 0.8506
+      - dataset: cqadupstack-programmers
+        scores:
+          - nDCG@10: 0.2963
+            R@100: 0.6194
+            R@1000: 0.8096
+      - dataset: cqadupstack-stats
+        scores:
+          - nDCG@10: 0.2790
+            R@100: 0.5719
+            R@1000: 0.7619
+      - dataset: cqadupstack-tex
+        scores:
+          - nDCG@10: 0.2086
+            R@100: 0.4954
+            R@1000: 0.7222
+      - dataset: cqadupstack-unix
+        scores:
+          - nDCG@10: 0.2788
+            R@100: 0.5721
+            R@1000: 0.7783
+      - dataset: cqadupstack-webmasters
+        scores:
+          - nDCG@10: 0.3008
+            R@100: 0.6100
+            R@1000: 0.8226
+      - dataset: cqadupstack-wordpress
+        scores:
+          - nDCG@10: 0.2562
+            R@100: 0.5526
+            R@1000: 0.7848
+      - dataset: quora
+        scores:
+          - nDCG@10: 0.7886
+            R@100: 0.9733
+            R@1000: 0.9950
+      - dataset: dbpedia-entity
+        scores:
+          - nDCG@10: 0.3128
+            R@100: 0.3981
+            R@1000: 0.5848
+      - dataset: scidocs
+        scores:
+          - nDCG@10: 0.1581
+            R@100: 0.3561
+            R@1000: 0.5599
+      - dataset: fever
+        scores:
+          - nDCG@10: 0.7530
+            R@100: 0.9309
+            R@1000: 0.9599
+      - dataset: climate-fever
+        scores:
+          - nDCG@10: 0.2129
+            R@100: 0.4357
+            R@1000: 0.6099
+      - dataset: scifact
+        scores:
+          - nDCG@10: 0.6647
+            R@100: 0.9076
+            R@1000: 0.9800
+  - name: splade-distil-cocodenser-medium
+    command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}-splade_distil_cocodenser_medium --topics beir-v1.0.0-${dataset}-test-splade_distil_cocodenser_medium --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --impact --remove-query
+    datasets:
+      - dataset: trec-covid
+        scores:
+          - nDCG@10: 0.7109
+            R@100: 0.1308
+            R@1000: 0.4433
+      - dataset: bioasq
+        scores:
+          - nDCG@10: 0.5035
+            R@100: 0.7422
+            R@1000: 0.8904
+      - dataset: nfcorpus
+        scores:
+          - nDCG@10: 0.3454
+            R@100: 0.2891
+            R@1000: 0.5694
+      - dataset: nq
+        scores:
+          - nDCG@10: 0.5442
+            R@100: 0.9285
+            R@1000: 0.9812
+      - dataset: hotpotqa
+        scores:
+          - nDCG@10: 0.6860
+            R@100: 0.8144
+            R@1000: 0.8945
+      - dataset: fiqa
+        scores:
+          - nDCG@10: 0.3514
+            R@100: 0.6298
+            R@1000: 0.8323
+      - dataset: signal1m
+        scores:
+          - nDCG@10: 0.2957
+            R@100: 0.3311
+            R@1000: 0.5514
+      - dataset: trec-news
+        scores:
+          - nDCG@10: 0.3936
+            R@100: 0.4323
+            R@1000: 0.6977
+      - dataset: robust04
+        scores:
+          - nDCG@10: 0.4581
+            R@100: 0.3773
+            R@1000: 0.6099
+      - dataset: arguana
+        scores:
+          - nDCG@10: 0.5210
+            R@100: 0.9822
+            R@1000: 0.9950
+      - dataset: webis-touche2020
+        scores:
+          - nDCG@10: 0.2435
+            R@100: 0.4723
+            R@1000: 0.8116
+      - dataset: cqadupstack-android
+        scores:
+          - nDCG@10: 0.3954
+            R@100: 0.7405
+            R@1000: 0.9035
+      - dataset: cqadupstack-english
+        scores:
+          - nDCG@10: 0.4026
+            R@100: 0.6768
+            R@1000: 0.8346
+      - dataset: cqadupstack-gaming
+        scores:
+          - nDCG@10: 0.5061
+            R@100: 0.8138
+            R@1000: 0.9253
+      - dataset: cqadupstack-gis
+        scores:
+          - nDCG@10: 0.3223
+            R@100: 0.6419
+            R@1000: 0.8385
+      - dataset: cqadupstack-mathematica
+        scores:
+          - nDCG@10: 0.2423
+            R@100: 0.5732
+            R@1000: 0.7848
+      - dataset: cqadupstack-physics
+        scores:
+          - nDCG@10: 0.3668
+            R@100: 0.7286
+            R@1000: 0.8931
+      - dataset: cqadupstack-programmers
+        scores:
+          - nDCG@10: 0.3412
+            R@100: 0.6653
+            R@1000: 0.8451
+      - dataset: cqadupstack-stats
+        scores:
+          - nDCG@10: 0.3142
+            R@100: 0.5889
+            R@1000: 0.7823
+      - dataset: cqadupstack-tex
+        scores:
+          - nDCG@10: 0.2575
+            R@100: 0.5231
+            R@1000: 0.7372
+      - dataset: cqadupstack-unix
+        scores:
+          - nDCG@10: 0.3292
+            R@100: 0.6192
+            R@1000: 0.8225
+      - dataset: cqadupstack-webmasters
+        scores:
+          - nDCG@10: 0.3343
+            R@100: 0.6404
+            R@1000: 0.8767
+      - dataset: cqadupstack-wordpress
+        scores:
+          - nDCG@10: 0.2839
+            R@100: 0.5974
+            R@1000: 0.8036
+      - dataset: quora
+        scores:
+          - nDCG@10: 0.8136
+            R@100: 0.9817
+            R@1000: 0.9979
+      - dataset: dbpedia-entity
+        scores:
+          - nDCG@10: 0.4416
+            R@100: 0.5636
+            R@1000: 0.7774
+      - dataset: scidocs
+        scores:
+          - nDCG@10: 0.1590
+            R@100: 0.3671
+            R@1000: 0.5891
+      - dataset: fever
+        scores:
+          - nDCG@10: 0.7962
+            R@100: 0.9550
+            R@1000: 0.9751
+      - dataset: climate-fever
+        scores:
+          - nDCG@10: 0.2276
+            R@100: 0.5140
+            R@1000: 0.7084
+      - dataset: scifact
+        scores:
+          - nDCG@10: 0.6992
+            R@100: 0.9270
+            R@1000: 0.9767
+  - name: contriever
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
+    datasets:
+      - dataset: trec-covid
+        scores:
+          - nDCG@10: 0.2732
+            R@100: 0.0368
+            R@1000: 0.1675
+      - dataset: bioasq
+        scores:
+          - nDCG@10: 0.3016
+            R@100: 0.5412
+            R@1000: 0.7396
+      - dataset: nfcorpus
+        scores:
+          - nDCG@10: 0.3173
+            R@100: 0.2943
+            R@1000: 0.6232
+      - dataset: nq
+        scores:
+          - nDCG@10: 0.2536
+            R@100: 0.7712
+            R@1000: 0.9286
+      - dataset: hotpotqa
+        scores:
+          - nDCG@10: 0.4807
+            R@100: 0.7046
+            R@1000: 0.8294
+      - dataset: fiqa
+        scores:
+          - nDCG@10: 0.2449
+            R@100: 0.5619
+            R@1000: 0.8215
+      - dataset: signal1m
+        scores:
+          - nDCG@10: 0.2338
+            R@100: 0.2568
+            R@1000: 0.4757
+      - dataset: trec-news
+        scores:
+          - nDCG@10: 0.3484
+            R@100: 0.4234
+            R@1000: 0.7389
+      - dataset: robust04
+        scores:
+          - nDCG@10: 0.3155
+            R@100: 0.2757
+            R@1000: 0.5097
+      - dataset: arguana
+        scores:
+          - nDCG@10: 0.3791
+            R@100: 0.9011
+            R@1000: 0.9851
+      - dataset: webis-touche2020
+        scores:
+          - nDCG@10: 0.1668
+            R@100: 0.3736
+            R@1000: 0.7144
+      - dataset: cqadupstack-android
+        scores:
+          - nDCG@10: 0.3771
+            R@100: 0.7436
+            R@1000: 0.9173
+      - dataset: cqadupstack-english
+        scores:
+          - nDCG@10: 0.3571
+            R@100: 0.6442
+            R@1000: 0.8042
+      - dataset: cqadupstack-gaming
+        scores:
+          - nDCG@10: 0.4597
+            R@100: 0.8092
+            R@1000: 0.9354
+      - dataset: cqadupstack-gis
+        scores:
+          - nDCG@10: 0.2411
+            R@100: 0.5792
+            R@1000: 0.8018
+      - dataset: cqadupstack-mathematica
+        scores:
+          - nDCG@10: 0.1841
+            R@100: 0.5127
+            R@1000: 0.7757
+      - dataset: cqadupstack-physics
+        scores:
+          - nDCG@10: 0.3430
+            R@100: 0.7013
+            R@1000: 0.8980
+      - dataset: cqadupstack-programmers
+        scores:
+          - nDCG@10: 0.3029
+            R@100: 0.6402
+            R@1000: 0.8434
+      - dataset: cqadupstack-stats
+        scores:
+          - nDCG@10: 0.2483
+            R@100: 0.5269
+            R@1000: 0.7417
+      - dataset: cqadupstack-tex
+        scores:
+          - nDCG@10: 0.1540
+            R@100: 0.4333
+            R@1000: 0.6870
+      - dataset: cqadupstack-unix
+        scores:
+          - nDCG@10: 0.2636
+            R@100: 0.5879
+            R@1000: 0.8212
+      - dataset: cqadupstack-webmasters
+        scores:
+          - nDCG@10: 0.2878
+            R@100: 0.6485
+            R@1000: 0.8800
+      - dataset: cqadupstack-wordpress
+        scores:
+          - nDCG@10: 0.1914
+            R@100: 0.5364
+            R@1000: 0.7551
+      - dataset: quora
+        scores:
+          - nDCG@10: 0.8349
+            R@100: 0.9871
+            R@1000: 0.9981
+      - dataset: dbpedia-entity
+        scores:
+          - nDCG@10: 0.2916
+            R@100: 0.4529
+            R@1000: 0.7142
+      - dataset: scidocs
+        scores:
+          - nDCG@10: 0.1491
+            R@100: 0.3601
+            R@1000: 0.6105
+      - dataset: fever
+        scores:
+          - nDCG@10: 0.6821
+            R@100: 0.9356
+            R@1000: 0.9655
+      - dataset: climate-fever
+        scores:
+          - nDCG@10: 0.1550
+            R@100: 0.4422
+            R@1000: 0.7232
+      - dataset: scifact
+        scores:
+          - nDCG@10: 0.6493
+            R@100: 0.9260
+            R@1000: 0.9967
+  - name: contriever-msmarco
+    command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever-msmarco --index beir-v1.0.0-${dataset}.contriever-msmarco --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
+    datasets:
+      - dataset: trec-covid
+        scores:
+          - nDCG@10: 0.5964
+            R@100: 0.0907
+            R@1000: 0.3351
+      - dataset: bioasq
+        scores:
+          - nDCG@10: 0.3829
+            R@100: 0.6072
+            R@1000: 0.7666
+      - dataset: nfcorpus
+        scores:
+          - nDCG@10: 0.3281
+            R@100: 0.3008
+            R@1000: 0.6305
+      - dataset: nq
+        scores:
+          - nDCG@10: 0.4977
+            R@100: 0.9252
+            R@1000: 0.986
+      - dataset: hotpotqa
+        scores:
+          - nDCG@10: 0.6376
+            R@100: 0.7772
+            R@1000: 0.8718
+      - dataset: fiqa
+        scores:
+          - nDCG@10: 0.3293
+            R@100: 0.6558
+            R@1000: 0.8695
+      - dataset: signal1m
+        scores:
+          - nDCG@10: 0.2783
+            R@100: 0.322
+            R@1000: 0.5419
+      - dataset: trec-news
+        scores:
+          - nDCG@10: 0.4283
+            R@100: 0.4924
+            R@1000: 0.7752
+      - dataset: robust04
+        scores:
+          - nDCG@10: 0.4729
+            R@100: 0.3917
+            R@1000: 0.6552
+      - dataset: arguana
+        scores:
+          - nDCG@10: 0.4461
+            R@100: 0.9765
+            R@1000: 0.9964
+      - dataset: webis-touche2020
+        scores:
+          - nDCG@10: 0.204
+            R@100: 0.442
+            R@1000: 0.829
+      - dataset: cqadupstack-android
+        scores:
+          - nDCG@10: 0.4255
+            R@100: 0.7503
+            R@1000: 0.9304
+      - dataset: cqadupstack-english
+        scores:
+          - nDCG@10: 0.4326
+            R@100: 0.6935
+            R@1000: 0.8435
+      - dataset: cqadupstack-gaming
+        scores:
+          - nDCG@10: 0.5276
+            R@100: 0.8481
+            R@1000: 0.9427
+      - dataset: cqadupstack-gis
+        scores:
+          - nDCG@10: 0.3022
+            R@100: 0.6272
+            R@1000: 0.8417
+      - dataset: cqadupstack-mathematica
+        scores:
+          - nDCG@10: 0.2355
+            R@100: 0.5726
+            R@1000: 0.7995
+      - dataset: cqadupstack-physics
+        scores:
+          - nDCG@10: 0.4159
+            R@100: 0.7619
+            R@1000: 0.9162
+      - dataset: cqadupstack-programmers
+        scores:
+          - nDCG@10: 0.3574
+            R@100: 0.7191
+            R@1000: 0.8878
+      - dataset: cqadupstack-stats
+        scores:
+          - nDCG@10: 0.3095
+            R@100: 0.586
+            R@1000: 0.7805
+      - dataset: cqadupstack-tex
+        scores:
+          - nDCG@10: 0.2209
+            R@100: 0.4985
+            R@1000: 0.7348
+      - dataset: cqadupstack-unix
+        scores:
+          - nDCG@10: 0.3257
+            R@100: 0.6161
+            R@1000: 0.8373
+      - dataset: cqadupstack-webmasters
+        scores:
+          - nDCG@10: 0.3392
+            R@100: 0.7032
+            R@1000: 0.8956
+      - dataset: cqadupstack-wordpress
+        scores:
+          - nDCG@10: 0.2532
+            R@100: 0.5769
+            R@1000: 0.7929
+      - dataset: quora
+        scores:
+          - nDCG@10: 0.8648
+            R@100: 0.9935
+            R@1000: 0.9994
+      - dataset: dbpedia-entity
+        scores:
+          - nDCG@10: 0.4128
+            R@100: 0.5414
+            R@1000: 0.7751
+      - dataset: scidocs
+        scores:
+          - nDCG@10: 0.1652
+            R@100: 0.3783
+            R@1000: 0.6216
+      - dataset: fever
+        scores:
+          - nDCG@10: 0.7583
+            R@100: 0.9494
+            R@1000: 0.9705
+      - dataset: climate-fever
+        scores:
+          - nDCG@10: 0.2371
+            R@100: 0.5746
+            R@1000: 0.8019
+      - dataset: scifact
+        scores:
+          - nDCG@10: 0.6768
+            R@100: 0.947
+            R@1000: 0.9833
diff --git a/pyserini/resources/index-metadata/faiss-flat.wiki-all-6-3.dpr2-multi-retriever.20230103.186fa7.README.md b/pyserini/resources/index-metadata/faiss-flat.wiki-all-6-3.dpr2-multi-retriever.20230103.186fa7.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..549d7c4e804f57ed27cc6eb795040ba57940bf7b
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss-flat.wiki-all-6-3.dpr2-multi-retriever.20230103.186fa7.README.md
@@ -0,0 +1,19 @@
+# wiki-all-6-3-dpr2-multi
+
+Faiss FlatIP index of wiki-all-6-3 (https://huggingface.co/datasets/castorini/odqa-wiki-corpora) encoded by a 2nd iteration DPR model trained on multiple QA datasets (castorini/wiki-all-6-3-multi-dpr2-passage-encoder).
+This index was generated on 2023/01/03 on `narval` at commits:
+
++ Pyserini commit ['186fa7'](https://github.com/castorini/pyserini/commit/186fa793867f7572d62dc323322ba92926f12ce4) (2023/01/03)
++ [Tevatron](https://github.com/texttron/tevatron) commit [`7a5afe`](https://github.com/texttron/tevatron/commit/7a5afedb5893009154a0e915a2597e1a95e9d2a8) (2023/01/03)
+
+with the following command to generate the embeddings (from Tevatron repo):
+
+```bash
+python -m tevatron.driver.jax_encode \
+  --output_dir=temp \
+  --model_name_or_path wiki-all-6-3-multi-dpr2-passage-encoder  \
+  --per_device_eval_batch_size 1248 \
+  --dataset_name wiki_all_6_3.jsonl \
+  --encoded_save_path corpus_emb.pkl \
+  --p_max_len 256
+```
diff --git a/pyserini/resources/index-metadata/faiss-flat.wikipedia.dkrr-dpr-nq-retriever.20220217.25ed1f.cc91b2.README.md b/pyserini/resources/index-metadata/faiss-flat.wikipedia.dkrr-dpr-nq-retriever.20220217.25ed1f.cc91b2.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..faa97b4a33f5dd414332071bd635e9885f3a6cd6
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss-flat.wikipedia.dkrr-dpr-nq-retriever.20220217.25ed1f.cc91b2.README.md
@@ -0,0 +1,27 @@
+# wikipedia-dpr-dkrr-nq
+
+Faiss FlatIP index of Wikipedia DPR encoded by the retriever model from [Distilling Knowledge from Reader to Retriever for Question Answering](https://arxiv.org/abs/2012.04584) trained on NQ.
+This index was generated on 2022/02/17 on `orca` at commits:
+
++ Pyserini commit [`cc91b2`](https://github.com/castorini/pyserini/commit/cc91b22f549702068cea1283f91b31d28d127b2f) (2022/02/17)
++ [FiD](https://github.com/facebookresearch/FiD) commit [`25ed1f`](https://github.com/facebookresearch/FiD/commit/25ed1ff0fe0288b80fb5e9e5de8d6346b94b8d48) (2022/02/17)
+
+with the following command to generate the embeddings (from FiD repo):
+
+```bash
+python generate_passage_embeddings.py \
+  --model_path nq_retriever \
+  --passages passages.tsv \
+  --output_path wikipedia_embeddings_nq \
+  --shard_id 0 \
+  --num_shards 1 \
+  --per_gpu_batch_size 500
+```
+
+and the following command to convert the embeddings to faiss IndexFlatIP form:
+
+```bash
+python convert_dkrr_embeddings_to_faiss.py \
+  --embeddings wikipedia_embeddings_nq \
+  --output faiss-flat.wikipedia.dkrr-dpr-nq-retriever
+```
diff --git a/pyserini/resources/index-metadata/faiss-flat.wikipedia.dkrr-dpr-tqa-retriever.20220217.25ed1f.cc91b2.README.md b/pyserini/resources/index-metadata/faiss-flat.wikipedia.dkrr-dpr-tqa-retriever.20220217.25ed1f.cc91b2.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2c1cc89fab10436c8e8266dffb1f1dc3a0d2e305
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss-flat.wikipedia.dkrr-dpr-tqa-retriever.20220217.25ed1f.cc91b2.README.md
@@ -0,0 +1,27 @@
+# wikipedia-dpr-dkrr-tqa
+
+Faiss FlatIP index of Wikipedia DPR encoded by the retriever model from [Distilling Knowledge from Reader to Retriever for Question Answering](https://arxiv.org/abs/2012.04584) trained on TriviaQA.
+This index was generated on 2022/02/17 on `orca` at commits:
+
++ Pyserini commit [`cc91b2`](https://github.com/castorini/pyserini/commit/cc91b22f549702068cea1283f91b31d28d127b2f) (2022/02/17)
++ [FiD](https://github.com/facebookresearch/FiD) commit [`25ed1f`](https://github.com/facebookresearch/FiD/commit/25ed1ff0fe0288b80fb5e9e5de8d6346b94b8d48) (2022/02/17)
+
+with the following command to generate the embeddings (from FiD repo):
+
+```bash
+python generate_passage_embeddings.py \
+  --model_path tqa_retriever \
+  --passages passages.tsv \
+  --output_path wikipedia_embeddings_tqa \
+  --shard_id 0 \
+  --num_shards 1 \
+  --per_gpu_batch_size 500
+```
+
+and the following command to convert the embeddings to faiss IndexFlatIP form:
+
+```bash
+python convert_dkrr_embeddings_to_faiss.py \
+  --embeddings wikipedia_embeddings_tqa \
+  --output faiss-flat.wikipedia.dkrr-dpr-tqa-retriever
+```
diff --git a/pyserini/resources/index-metadata/faiss-hnsw.cast2019.tct_colbert-v2-readme.txt b/pyserini/resources/index-metadata/faiss-hnsw.cast2019.tct_colbert-v2-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1fb3cd09f7c5b26b29a839d3f3c4bf5fdbe2cfdc
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss-hnsw.cast2019.tct_colbert-v2-readme.txt
@@ -0,0 +1,5 @@
+This faiss hnsw index was generated on 2021/10/23 using the repo https://github.com/castorini/CQE (see command Inference section) on Orca (Passage encoding on graham).
+
+Both the hyperparameter M and efConstruction are set to 256. 
+Note that in the future the index name should be renamed as faiss-hnsw.cast2019.tct_colbert-v2
+
diff --git a/pyserini/resources/index-metadata/faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md b/pyserini/resources/index-metadata/faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5da58d8377d4842a6192028bf24e9f6f1d15f22c
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.beir-v1.0.0.contriever-msmarco.20230124.README.md
@@ -0,0 +1,19 @@
+# BEIR v1.0.0 contriever-msmarco
+
+This index was generated on 20230124 using Tevatron with following command: 
+
+```
+python -m tevatron.driver.encode \
+--output_dir=temp \
+--model_name_or_path facebook/contriever-msmarco \
+--fp16 \
+--tokenizer_name bert-base-uncased \
+--per_device_eval_batch_size 156 \
+--p_max_len 512 \
+--dataset_name Tevatron/beir-corpus:$subdataset \
+--encoded_save_path beir_embeddings/corpus_emb.$subdataset.pkl
+```
+
+where the `subdataset` is one of the BEIR dataset, e.g. `scifact`.
+
+The Embedding is then converted to Pyserini index format.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.beir-v1.0.0.contriever.20230124.README.md b/pyserini/resources/index-metadata/faiss.beir-v1.0.0.contriever.20230124.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..761bf6627edc5411e3254ae6169129f344284372
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.beir-v1.0.0.contriever.20230124.README.md
@@ -0,0 +1,19 @@
+# BEIR v1.0.0 contriever
+
+This index was generated on 20230124 using Tevatron with following command: 
+
+```
+python -m tevatron.driver.encode \
+--output_dir=temp \
+--model_name_or_path facebook/contriever \
+--fp16 \
+--tokenizer_name bert-base-uncased \
+--per_device_eval_batch_size 156 \
+--p_max_len 512 \
+--dataset_name Tevatron/beir-corpus:$subdataset \
+--encoded_save_path beir_embeddings/corpus_emb.$subdataset.pkl
+```
+
+where the `subdataset` is one of the BEIR dataset, e.g. `scifact`.
+
+The Embedding is then converted to Pyserini index format.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md b/pyserini/resources/index-metadata/faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..31b70df5f4ee3a9dc5353a096b50b0be142e9a25
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco-ft-all.README.md
@@ -0,0 +1,24 @@
+# miracl-v1.0-mdpr-tied-pft-msmarco-ft-all
+
+This index was generated on 2022/10/04 at Pyserini commit [`2b2856`](https://github.com/castorini/pyserini/commit/2b2856a9037c11061470cbf3d0961c7d041f1342) on `basilisk` with the following command: 
+
+```
+corpus=./corpus/miracl-corpus-v1.0-${lang}
+
+encoder=castorini/mdpr-tied-pft-msmarco-ft-all
+shard_id=0
+shard_num=1
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                                    --shard-id $shard_id \
+                                    --shard-num $shard_num \
+                            output  --embeddings  $index_dir-$shard_id \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --encoder-class 'auto' \
+                                    --fp16
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md b/pyserini/resources/index-metadata/faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..90686de22d6609ea339163a2020cef3956249e13
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.miracl-v1.0.20221004.2b2856.mdpr-tied-pft-msmarco.README.md
@@ -0,0 +1,24 @@
+# miracl-v1.0-mdpr-tied-pft-msmarco
+
+This index was generated on 2022/10/04 at Pyserini commit [`2b2856`](https://github.com/castorini/pyserini/commit/2b2856a9037c11061470cbf3d0961c7d041f1342) on `basilisk` with the following command: 
+
+```
+corpus=./corpus/miracl-corpus-v1.0-${lang}
+
+encoder=castorini/mdpr-tied-pft-msmarco
+shard_id=0
+shard_num=1
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                                    --shard-id $shard_id \
+                                    --shard-num $shard_num \
+                            output  --embeddings  $index_dir-$shard_id \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --encoder-class 'auto' \
+                                    --fp16
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md b/pyserini/resources/index-metadata/faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..48d86c12fd33aa2910284accf87cb897b46df215
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.miracl-v1.0.20230313.e40d4a.mcontriever-tied-pft-msmarco.README.md
@@ -0,0 +1,42 @@
+# miracl-v1.0-mdpr-tied-pft-msmarco-ft-miracl-${lang}
+
+This index was generated on 2023/03/13 on commit 20230313. 
+
+## Index from Pyserini
+```bash
+lang=ar # or any lang abbreviation
+
+encoder=facebook/mcontriever-msmarco
+index_dir=faiss.miracl-v1.0-$lang.mcontriever-tied-pft-msmarco.20230313.e40d4a
+echo $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                                    --shard-id $shard_id \
+                                    --shard-num $shard_num \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --encoder-class contriever \
+                                    --fp16
+```
+
+## To use as Search
+```
+index=
+output=run.miracl.mdpr-tied-pft-msmarco.$lang.dev.txt 
+
+python -m pyserini.search.faiss \
+    --encoder-class contriever \
+    --encoder facebook/mcontriever-msmarco \
+    --topics miracl-v1.0-$lang-dev \
+    --index miracl-v1.0-$lang-mcontriever-pft-msmarco \
+    --output $output \
+    --batch 128 --threads 16 --hits 100
+```
+
+
+python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/mcontriever-msmarco --topics miracl-v1.0-$lang-dev --index miracl-v1.0-$lang-mcontriever-pft-msmarco --output $output --batch 128 --threads 16 --hits 100
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md b/pyserini/resources/index-metadata/faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dd131f1999b230bd209feb2338372c0ffe14746b
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.miracl-v1.0.mdpr-tied-pft-msmarco-ft-miracl.20230329.e40d4a.README.md
@@ -0,0 +1,142 @@
+# miracl-v1.0-mdpr-tied-pft-msmarco-ft-miracl-${lang}
+
+This index was generated on 2023/03/21 using [tevatron](https://github.com/texttron/tevatron) with the following commands:
+
+## Create Train Directory
+
+> **`create_train_dir.py`**
+> ```python
+> import json
+> from pyserini.search.lucene import LuceneSearcher
+> from datasets import load_dataset
+> from random import shuffle
+> from tqdm import tqdm
+> 
+> searcher = LuceneSearcher.from_prebuilt_index('miracl-v1.0-${lang}')
+> searcher.set_language('${lang}')
+> 
+> miracl_train = load_dataset('miracl/miracl', '${lang}', split='train')
+> with open('miracl_train_bm25_neg_top100_random30.${lang}.jsonl', 'w') as f:
+>     for data in tqdm(miracl_train):
+>         query = data['query']
+>         positives = data['positive_passages']
+>         negatives = data['negative_passages']
+>         positive_ids = [p['docid'] for p in positives]
+>         negative_ids = [p['docid'] for p in negatives]
+>         hits = searcher.search(query, k=100)
+>         bm25_negatives = []
+>         for hit in hits:
+>             info = json.loads(hit.raw)
+>             if info['docid'] not in positive_ids and info['docid'] not in negative_ids:
+>                 bm25_negatives.append(info)
+>         all_negatives = negatives + bm25_negatives
+>         shuffle(all_negatives)
+>         random_30_negatives = all_negatives[:30]
+>         data['negative_passages'] = random_30_negatives
+>         if len(random_30_negatives) > 0:
+>             f.write(json.dumps(data, ensure_ascii=False)+'\n')
+> ```
+
+```bash
+python create_train_dir.py
+```
+
+## Train
+```bash
+CUDA_VISIBLE_DEVICES=0 python -m tevatron.driver.train \
+  --output_dir model_miracl_${lang} \
+  --model_name_or_path castorini/mdpr-tied-pft-msmarco \
+  --tokenizer_name bert-base-multilingual-cased \
+  --save_steps 20000 \
+  --dataset_name Tevatron/msmarco-passage \
+  --per_device_train_batch_size 64 \
+  --train_dir miracl_train_bm25_neg_top100_random30.${lang}.jsonl \
+  --train_n_passages 2 \
+  --learning_rate 1e-5 \
+  --q_max_len 32 \
+  --p_max_len 256 \
+  --num_train_epochs 40 \
+  --logging_steps 10 \
+  --overwrite_output_dir \
+  --fp16
+```
+
+## Encode Corpus
+```bash
+CUDA_VISIBLE_DEVICES=0 python -m tevatron.driver.encode \
+  --output_dir=temp_out \
+  --model_name_or_path model_miracl_${lang} \
+  --fp16 \
+  --per_device_eval_batch_size 256 \
+  --dataset_name miracl/miracl-corpus:${lang} \
+  --p_max_len 256 \
+  --encoded_save_path model_miracl_${lang}_corpus/${lang}_corpus_emb.pt 
+```
+
+## Convert Index
+
+> #### **`convert_index.py`**
+> ```python
+> import numpy as np
+> import faiss
+> import pickle
+> import os
+> from tqdm import tqdm
+> import argparse
+> 
+> parser = argparse.ArgumentParser()
+> parser.add_argument('--input', type=str, required=True)
+> parser.add_argument('--output', type=str, required=True)
+> args = parser.parse_args()
+> 
+> def pickle_load(path):
+>     with open(path, 'rb') as f:
+>         reps, lookup = pickle.load(f)
+>     return np.array(reps), lookup
+> 
+> index = faiss.IndexFlatIP(768)
+> 
+> all_ids = []
+> for name in tqdm(os.listdir(args.input)):
+>     if 'corpus_emb' not in name:
+>         continue
+>     path = os.path.join(args.input, name)
+>     reps, ids = pickle_load(path)
+>     all_ids.extend(ids)
+>     index.add(reps)
+> 
+> faiss.write_index(index, f'{args.output}/index')
+> with open(f'{args.output}/docid', 'w') as f:
+>     for i in all_ids:
+>         f.write(f'{i}\n')
+> ```
+
+```bash
+python test.py --input=model_miracl_${lang}_corpus --output=${lang}_index
+```
+
+
+## Index from Pyserini
+Tested to use the same checkpoint to index directly via Pyserini using the following command, got the same score. (on basilisk)
+(only tested on Swahili)
+```bash
+encoder=castorini/mdpr-tied-pft-msmarco-ft-miracl-$lang
+
+index_dir=miracl-v1.0-$lang-mdpr-tied-pft-msmarco-ft-miracl-$lang
+echo $index_dir
+
+
+CUDA_VISIBLE_DEVICES=1 \
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                                    --shard-id $shard_id \
+                                    --shard-num $shard_num \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --encoder-class 'auto' \
+                                    --fp16
+```
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-arabic.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-arabic.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9d9fecd2e88ac3edbeb8c25b9c7383e0433075ff
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-arabic.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-arabic
+
+Faiss flat index for Mr.TyDi v1.1 (Arabic), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=arabic
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-bengali.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-bengali.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f585aa07dcf732f7404565caa611e14fd58b724f
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-bengali.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-bengali
+
+Faiss flat index for Mr.TyDi v1.1 (Bengali), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=bengali
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-english.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-english.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6190b570e61dc90e76d9209c8ac9fb0004972fc1
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-english.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-english
+
+Faiss flat index for Mr.TyDi v1.1 (English), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=english
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-finnish.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-finnish.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f04b2d99985cd6583c0b17ece6185a9500b6f541
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-finnish.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-finnish
+
+Faiss flat index for Mr.TyDi v1.1 (Finnish), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=finnish
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-indonesian.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-indonesian.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6745b03862b707ce500493da118fade400fed8f0
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-indonesian.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-indonesian
+
+Faiss flat index for Mr.TyDi v1.1 (Indonesian), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=indonesian
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-japanese.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-japanese.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c7b0e58a79e5909dbf09a77f7936a339de58df78
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-japanese.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-japanese
+
+Faiss flat index for Mr.TyDi v1.1 (Japanese), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=japanese
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-korean.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-korean.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..87451ed03cd7fc1bd7591f36718b87c4a44991d1
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-korean.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-korean
+
+Faiss flat index for Mr.TyDi v1.1 (Korean), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=korean
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-russian.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-russian.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d804cba8f71bece12840497fdb5a8a3eb0e6609e
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-russian.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-russian
+
+Faiss flat index for Mr.TyDi v1.1 (Russian), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=russian
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-swahili.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-swahili.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3324357be037f2ffaf70852c44abe1aa9cbbbc63
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-swahili.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-swahili
+
+Faiss flat index for Mr.TyDi v1.1 (Swahili), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=swahili
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-telugu.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-telugu.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3607478d0b00d2bd6fe6926f5ecd45e00d4a67a2
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-telugu.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-telugu
+
+Faiss flat index for Mr.TyDi v1.1 (Telugu), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=telugu
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-thai.20220207.5df364.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-thai.20220207.5df364.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..233d6f7e2ae2e0a98121289f2d5f84265b855951
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1-thai.20220207.5df364.README.md
@@ -0,0 +1,46 @@
+# mrtydi-v1.1-thai
+
+Faiss flat index for Mr.TyDi v1.1 (Thai), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/02/07 at commit [5df364](https://github.com/castorini/pyserini/commit/5df3649b128ece125ce8a9171ed4001ce3a6ef23) on `narval` with the following command:
+
+```bash
+lang=thai
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+encoder=models/mdpr-context-encoder
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+mkdir -p $index_dir
+
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --fp16
+``` 
+
+Note that the delimiter was manually changed from "`\n`" into "`\n\n`" in `pyserini.encode`.
+This was later generalized into a command-line option in [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-question-nq \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt
+  --batch-size 36 \
+  --threads 12
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd327099d367d59521e436ff833b27396f2f5710
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220413.aa1c0e9.mdpr-tied-pft-msmarco.README.md
@@ -0,0 +1,71 @@
+# mrtydi-v1.1-arabic (trained on MS MARCO)
+
+Faiss flat index for Mr.TyDi v1.1 (Arabic), using mDPR fine-tuned on MS MARCO.
+
+This index was generated on 2022/03/27 at commit [aa1c0e9](https://github.com/castorini/pyserini/commit/aa1c0e9a5bbfab406f8c73d23c91a009307096c6) on `cedar` with the following command:
+
+```bash
+lang=arabic
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex-msmarco/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+shard_num=1
+encoder=mdpr-mrtydi-0shot-msmarco-tied-encoder-converted 
+
+for shard_id in $(seq 0 `$shard_num - 1`) ; do
+    index_dir=mdpr-dindex/$lang-$shard_id
+    mkdir -p $index_dir
+    python -m pyserini.encode   input   --corpus $corpus \
+                                        --fields title text \
+                                        --delimiter "\n\n" \
+                                        --shard-id $shard_id \
+                                        --shard-num $shard_num \
+                                output  --embeddings  $index_dir \
+                                        --to-faiss \
+                                encoder --encoder $encoder \
+                                        --fields title text \
+                                        --batch 128 \
+                                        --fp16
+done
+``` 
+
+Note that the delimiter are only supported after [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+The index can be later reproduced on commit [7b099d5](https://github.com/crystina-z/pyserini/commit/7b099d534901d1f0161982605cd40d039ddb701d) using
+```
+encoder=castorini/mdpr-tied-pft-msmarco
+index_dir=mdpr-dindex/$lang-$shard_id
+mkdir -p $index_dir
+python -m pyserini.encode   input   --corpus $corpus \
+                                    --fields title text \
+                                    --delimiter "\n\n" \
+                                    --shard-id $shard_id \
+                                    --shard-num $shard_num \
+                            output  --embeddings  $index_dir \
+                                    --to-faiss \
+                            encoder --encoder $encoder \
+                                    --fields title text \
+                                    --batch 128 \
+                                    --encoder-class 'auto' \
+                                    --fp16
+```
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.dsearch \
+  --encoder castorini/mdpr-tied-pft-msmarco \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt \
+  --batch-size 36 \
+  --threads 12 \
+  --encoder-class 'auto'
+```
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3fc67edad6ee47b0d468b0b6337f36c9c5560ceb
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220523.7b099d5.mdpr-tied-pft-nq.README.md
@@ -0,0 +1,54 @@
+# mrtydi-v1.1-arabic (trained on NQ)
+
+Faiss flat index for Mr.TyDi v1.1 (Arabic), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/05/23 at commit [7b099d5](https://github.com/crystina-z/pyserini/commit/7b099d534901d1f0161982605cd40d039ddb701d) on `basilisk` with the following command:
+
+```bash
+lang=arabic # any language in Mr. TyDi
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex-msmarco/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+shard_num=1
+encoder=castorini/mdpr-tied-pft-nq
+
+for shard_id in $(seq 0 `$shard_num - 1`) ; do
+    index_dir=mdpr-dindex/$lang-$shard_id
+    mkdir -p $index_dir
+    python -m pyserini.encode   input   --corpus $corpus \
+                                        --fields title text \
+                                        --delimiter "\n\n" \
+                                        --shard-id $shard_id \
+                                        --shard-num $shard_num \
+                                output  --embeddings  $index_dir \
+                                        --to-faiss \
+                                encoder --encoder $encoder \
+                                        --fields title text \
+                                        --batch 128 \
+                                        --encoder-class 'auto' \
+                                        --fp16
+done
+``` 
+
+Note that the delimiter are only supported after [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.search.faiss \
+  --encoder castorini/mdpr-tied-pft-msmarco \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt \
+  --batch-size 36 \
+  --threads 12 \
+  --encoder-class 'auto'
+```
diff --git a/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3ae5b26c5aa9df37841358aab566e38b55f98eca
--- /dev/null
+++ b/pyserini/resources/index-metadata/faiss.mrtydi-v1.1.20220524.7b099d5.mdpr-tied-pft-msmarco-ft-all.README.md
@@ -0,0 +1,54 @@
+# mrtydi-v1.1-arabic (pre--fine-tuned on MS MARCO then fine-tuned on all Mr. TyDi languages)
+
+Faiss flat index for Mr.TyDi v1.1 (Arabic), using mDPR fine-tuned on NQ.
+
+This index was generated on 2022/05/24 at commit [7b099d5](https://github.com/crystina-z/pyserini/commit/7b099d534901d1f0161982605cd40d039ddb701d) on `basilisk` with the following command:
+
+```bash
+lang=arabic # any language in Mr. TyDi
+
+tarfn=mrtydi-v1.1-$lang.tar.gz
+corpus=mrtydi-v1.1-$lang/collection/docs.jsonl
+index_dir=mrtydi-mdpr-dindex-msmarco/$lang
+
+wget https://git.uwaterloo.ca/jimmylin/mr.tydi/-/raw/master/data/$tarfn
+tar –xvf $tarfn
+gzip -cvf $corpus.gz > $corpus
+
+shard_num=1
+encoder=castorini/mdpr-tied-pft-msmarco-ft-all
+
+for shard_id in $(seq 0 `$shard_num - 1`) ; do
+    index_dir=mdpr-dindex/$lang-$shard_id
+    mkdir -p $index_dir
+    python -m pyserini.encode   input   --corpus $corpus \
+                                        --fields title text \
+                                        --delimiter "\n\n" \
+                                        --shard-id $shard_id \
+                                        --shard-num $shard_num \
+                                output  --embeddings  $index_dir \
+                                        --to-faiss \
+                                encoder --encoder $encoder \
+                                        --fields title text \
+                                        --batch 128 \
+                                        --encoder-class 'auto' \
+                                        --fp16
+done
+``` 
+
+Note that the delimiter are only supported after [Pyserini #1000](https://github.com/castorini/pyserini/pull/1000/commits/5021e12d1d2e1bc3d4015955bcf77076c5798ce6#diff-45356c3f5e9cd223bb23d7efea3f7ed834abbcd32f604eb7fdd138e364273241L104).
+
+
+Here's a sample retrieval command (on the test set):
+
+```bash
+set_name=test
+python -m pyserini.search.faiss \
+  --encoder castorini/mdpr-tied-pft-msmarco \
+  --topics mrtydi-v1.1-${lang}-${set_name} \
+  --index ${index_dir} \
+  --output runs/run.mrtydi-v1.1-$lang.${set_name}.txt \
+  --batch-size 36 \
+  --threads 12 \
+  --encoder-class 'auto'
+```
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd7fe0374595aaced509bb1fcaf66c6602910c0d
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt
@@ -0,0 +1,15 @@
+This index was generated on 2020/11/17 at commit f87c945fd1c1e4174468194c72e3c05688dc45dd Mon Nov 16 16:17:20 2020 -0500
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-doc \
+ -index index-msmarco-doc-20201117-f87c94 -threads 1 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-msmarco-doc-20201117-f87c94.tar.gz MD5 checksum = ac747860e7a37aed37cc30ed3990f273
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db57732f8afbf3f3e88b5b1d33eec15bb8c406ad
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/11/26 at
+
++ docTTTTTquery commit d2704c025c2bf6db652b4b27f49c4e59714ba898 (2020/11/24).
++ anserini commit 1b4d0a29879a867ca5d1f003f924acc3279455ba (2020/11/25).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-expanded -index index-msmarco-doc-expanded-per-doc-20201126-1b4d0a -optimize
+
+Note that this index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-expanded-per-doc-20201126-1b4d0a.tar.gz MD5 checksum = f7056191842ab77a01829cff68004782
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29362ba57057b07c9facdece128d8b1ab8540cb1
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/11/26 at
+
++ docTTTTTquery commit d2704c025c2bf6db652b4b27f49c4e59714ba898 (2020/11/24).
++ anserini commit 1b4d0a29879a867ca5d1f003f924acc3279455ba (2020/11/25).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-expanded-passage -index index-msmarco-doc-expanded-per-passage-20201126-1b4d0a -optimize
+
+Note that this index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-expanded-per-passage-20201126-1b4d0a.tar.gz MD5 checksum = 54ea30c64515edf3c3741291b785be53
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f250a5de3139b1ead215a7dee37c80dc94aee43
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt
@@ -0,0 +1,19 @@
+This index was generated on 2020/12/04 at
+
++ docTTTTTquery commit 5be1af130b4657ea117781f761c4e5d15c77cb42 (2020/12/01).
++ anserini commit f50dcceb6cd0ec3403c1e77066aa51bb3275d24e (2020/12/04).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-passage -index index-msmarco-doc-per-passage-20201204-f50dcc -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-msmarco-doc-per-passage-20201204-f50dcc.tar.gz MD5 checksum = 797367406a7542b649cefa6b41cf4c33
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-ltr-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-ltr-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65aec72512cdb11473d59a693a10cef00486145d
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-ltr-readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/31 at commit 33e4151e6d58f5b8ea0ef0768dc5308ec48b1aae 2021-10-31 16:53:36 +0800 
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-ltr-document/ltr_msmarco_pass_doc_jsonl \
+ -index index-msmarco-doc-per-passage-ltr-20211031-33e4151 -threads 21 -storeRaw -optimize -storePositions -storeDocvectors -pretokenizdd
+
+Note, pretokenized option is used to keep preprocessed tokenization.
+This is built with spacy 3.0.6.
+The max length is 3 and stride is 1.
+
+index-msmarco-passage-ltr-20210519-e25e33f MD5 checksum = bd60e89041b4ebbabc4bf0cfac608a87
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..565915c8b7b71a5484a920e1fd7d61fa6ed86b60
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/12/04 at
+
++ docTTTTTquery commit 5be1af130b4657ea117781f761c4e5d15c77cb42 (2020/12/01).
++ anserini commit f50dcceb6cd0ec3403c1e77066aa51bb3275d24e (2020/12/04).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-passage -index index-msmarco-doc-per-passage-slim-20201204-f50dcc -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-per-passage-slim-20201204-f50dcc.tar.gz MD5 checksum = 77c2409943a8c9faffabf57cb6adca69
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e79f60ca78240b3cc07d0e0fe0cfcfab76fb1db
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt
@@ -0,0 +1,10 @@
+This index was generated on 2020/12/02 at commit ab6e280b06a7a6476d001a5eb2319c191010c0e1 (2020/12/01)
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-doc \
+ -index index-msmarco-doc-slim-20201202-ab6e28 -threads 1 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-slim-20201202-ab6e28.tar.gz MD5 checksum = c56e752f7992bf6149761097641d515a
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3a08f586af6a12e26bf35ca643b8df471402313
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt
@@ -0,0 +1,15 @@
+This index was generated on 2020/11/17 at commit f87c945fd1c1e4174468194c72e3c05688dc45dd Mon Nov 16 16:17:20 2020 -0500
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-passage/collection_jsonl \
+ -index index-msmarco-passage-20201117-f87c94 -threads 9 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw passages are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-msmarco-passage-20201117-f87c94.tar.gz MD5 checksum = 1efad4f1ae6a77e235042eff4be1612d
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..23fa65442855f366c02dd94532e38cba6d0ea215
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/11/21 at
+
++ docTTTTTquery commit 701ea0a72beeb8db46aa409352a72ba52cd2c36b Tue Nov 17 07:13:27 2020 -0500
++ anserini commit e127fbea6f5515d60eb7c325cd866657dbf13cc6 Sat Nov 21 07:58:03 2020 -0500
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection \
+  -collection JsonCollection -generator DefaultLuceneDocumentGenerator \
+  -input msmarco-passage-expanded -index index-msmarco-passage-expanded-20201121-e127fb -threads 9 -optimize
+
+Note that this index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-passage-expanded-20201121-e127fb.tar.gz MD5 checksum = e5762e9e065b6fe5000f9c18da778565
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4a5e758a89f7c02bce46111805426255c1da8a88
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt
@@ -0,0 +1,11 @@
+This index was generated on 2021/05/19 at commit e25e33f4a06e9c1ab4d795908cae4474fa019643 2021-05-17 21:48:48 -0400 
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-ltr-passage/ltr_collection_jsonl \
+ -index index-msmarco-passage-ltr-20210519-e25e33f -threads 9 -storeRaw -optimize -storePositions -storeDocvectors -pretokenizdd
+
+Note, pretokenized option is used to keep preprocessed tokenization.
+This is built with spacy 3.0.6.
+
+index-msmarco-passage-ltr-20210519-e25e33f MD5 checksum = a5de642c268ac1ed5892c069bdc29ae3
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..010eaab227bbd2a50082bd31623658015fad7a93
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt
@@ -0,0 +1,10 @@
+This index was generated on 2020/12/02 at commit ab6e280b06a7a6476d001a5eb2319c191010c0e1 (2020/12/01)
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-passage/collection_jsonl \
+ -index index-msmarco-passage-slim-20201202-ab6e28 -threads 9 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-passage-slim-20201202-ab6e28.tar.gz MD5 checksum = 5e11da4cebd2e8dda2e73c589ffb0b4c
diff --git a/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt b/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bc45b21c72c06257c3556cb88f87ae4a2ceb5350
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt
@@ -0,0 +1,7 @@
+This index was generated on 12/13/2019 with Anserini v0.7.0, with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection TrecCollection \
+ -input /tuna1/collections/newswire/disk45/ -index index-robust04-20191213 \
+ -generator JsoupGenerator -threads 16 -storePositions -storeDocvectors -storeRawDocs -optimize
+
+index-robust04-20191213.tar.gz MD5 checksum = 15f3d001489c97849a010b0a4734d018
diff --git a/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt b/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e449ad1048c18696b1638916a1011f0f7da255f7
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt
@@ -0,0 +1,18 @@
+This index was generated on 2021/01/20 at
+
++ anserini commit d1b9e67928aa60fa557113ace5d209b0c58e994c (2021/01/19).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 22 \
+  -input wikipedia-dpr-jsonl -index index-wikipedia-dpr-20210120-d1b9e6 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-wikipedia-dpr-20210120-d1b9e6.tar.gz MD5 checksum = c28f3a56b2dfcef25bf3bf755c264d04
diff --git a/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt b/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9ff6af6e28a5b08851f3b1e747a8d0024102c3a4
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt
@@ -0,0 +1,13 @@
+This index was generated on 2021/01/20 at
+
++ anserini commit d1b9e67928aa60fa557113ace5d209b0c58e994c (2021/01/19).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 22 \
+  -input wikipedia-dpr-jsonl -index index-wikipedia-dpr-slim-20210120-d1b9e6 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-wikipedia-dpr-slim-20210120-d1b9e6.tar.gz MD5 checksum = 7d40604a824b5df37a1ae9d25ea38071
diff --git a/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt b/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8449100a55f1206cedcc1ba0ba32d44b070cef9d
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt
@@ -0,0 +1,18 @@
+This index was generated on 2021/04/22 at
+
++ anserini commit f29307a9fb162ec7bef4919a164929a673d2304e (2021/04/21).
+
+with the following command:
+
+python -m pyserini.index -collection JsonCollection -generator DefaultLuceneDocumentGenerator \
+ -threads 40 -input collections/wikipedia-kilt-doc \
+ -index indexes/index-wikipedia-kilt-doc-20210421-f29307 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-wikipedia-kilt-doc-20210421-f29307.tar.gz MD5 checksum = b8ec8feb654f7aaa86f9901dc6c804a8
diff --git a/pyserini/resources/index-metadata/lucene-index-wiki-all-6-3-tamber-20230111-40277a.README.md b/pyserini/resources/index-metadata/lucene-index-wiki-all-6-3-tamber-20230111-40277a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6e04c56f067c243bce43282d67b6734cde21adb7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index-wiki-all-6-3-tamber-20230111-40277a.README.md
@@ -0,0 +1,24 @@
+# wiki-all-6-3-tamber lucene index
+
+This Lucene index was generated on 2023/01/11 at Pyserini commit ['40277a'](https://github.com/castorini/pyserini/commit/40277ae007e4d28882af19d6ce1e899a0af04a68)
+with the following commands:
+
+First make sure you have git lfs installed to clone the huggingface repository.
+```bash
+git lfs install
+```
+
+```bash
+git clone https://huggingface.co/datasets/castorini/odqa-wiki-corpora
+
+python -m pyserini.index.lucene \
+  --collection MrTyDiCollection \
+  --input odqa-wiki-corpora/wiki-all-6-3-tamber \
+  --index indexes/index-wiki-all-6-3-tamber-20230111-40277a \
+  --generator DefaultLuceneDocumentGenerator \
+  --threads 12 \
+  --optimize \
+  --storeRaw
+  ```
+
+lucene-index-wiki-all-6-3-tamber-20230111-40277a.tar.gz MD5 checksum = 018b45ee8c6278a879caa3145b2dc05d
diff --git a/pyserini/resources/index-metadata/lucene-index.atomic.20230525.a7df7f.README.md b/pyserini/resources/index-metadata/lucene-index.atomic.20230525.a7df7f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..03901e39925b5b173355c99443c91c4d7c97cf56
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.atomic.20230525.a7df7f.README.md
@@ -0,0 +1,5 @@
+# AToMiC BM 25 Indexes
+
+Lucene indexes for the AToMiC dataset (text collection v0.2.1, image collection v0.2)
+
+These indexes were generated on 2023/05/25 at Anserini commit [`a7df7f`](https://github.com/castorini/anserini/commit/a7df7fc5d527ede8f34ee60afa41dec4f6b0e93a) on Compute Canada's Cedar cluster running [this script](https://github.com/TREC-AToMiC/AToMiC/blob/f2f9b58ffd39d920c7599ba49de40a34dd1a21b8/examples/bm25_en_caption/run_bm25_baseline.py#L62) (in particular, the `create_index` function).
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b0d21d867892fee7e630177e58a6ab528e0522a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — ArguAna
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/arguana \
+  -index indexes/lucene-index.beir-v1.0.0-arguana-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-arguana-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..123ac08eeda746598b3acf4563e96b27df14abd3
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — ArguAna
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/arguana \
+  -index indexes/lucene-index.beir-v1.0.0-arguana-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-arguana-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e92ae63bb34cebb3e63795c0b9fcd9f89da487bd
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — ArguAna
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/arguana \
+  -index indexes/lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..796c53d4264bebb6cfc87339adb5e943d49d10da
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — BioASQ
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/bioasq \
+  -index indexes/lucene-index.beir-v1.0.0-bioasq-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-bioasq-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8de237d9da139a3d5e225adc913f2af905e62880
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — BioASQ
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/bioasq \
+  -index indexes/lucene-index.beir-v1.0.0-bioasq-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-bioasq-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cdcdecfe13c0f62dc8a7b4b653cd5574839d94ff
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — BioASQ
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/bioasq \
+  -index indexes/lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc2750c1060a248d5574db3ba281841273bcfc48
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Climate-FEVER
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/climate-fever \
+  -index indexes/lucene-index.beir-v1.0.0-climate-fever-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-climate-fever-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e7ae58fcf77cd215c9c319e0bca6e0a23e2f824
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Climate-FEVER
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/climate-fever \
+  -index indexes/lucene-index.beir-v1.0.0-climate-fever-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-climate-fever-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b383b388fb4f10cd703b8659e62cdfc455b22c6c
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Climate-FEVER
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/climate-fever \
+  -index indexes/lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..33fd2fd9c1fcdd376c5534c44492ffe190aee380
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-android
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-android \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-android-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6e9c81607f3b1d2fe5103779992a90ea0ad405b7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-android
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-android \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-android-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4eb1eaf22339ae6d4e91e0922cf84517f8cf6ad2
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-android
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-android \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b496abd0624fd55991fac16cd27fb12dc586e36a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-english
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-english \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-english-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e95065cf28a3351c2b241a970e9638e05290201b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-english
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-english \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-english-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ea16f8f847e1e82b89f76c7ff2052a0c58ed4f0d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-english
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-english \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cfa3dd88b05e728c70f1f188c7cf231ffc0ed775
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-gaming
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-gaming \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gaming-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..53c76ac330a398541b04c629244230f09157edff
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-gaming
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-gaming \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gaming-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..64ea65499106fee22d08a8b3d48b789e39bcfb2a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-gaming
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-gaming \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0140cef81d5b3c722859f6abb9db8ccb7fdb41cf
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-gis
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-gis \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gis-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2c4e31ca24247dd0af11217feb1eacc16fedfe26
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-gis
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-gis \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gis-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e1562022c60ce60cb8be99bd0a3f98281533b5b2
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-gis
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-gis \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ff959813ba669c27e1e750b1be340738414f21f1
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-mathematica
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-mathematica \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-mathematica-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..55970173077f57c2897050b926d2ae1f0430a952
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-mathematica
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-mathematica \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-mathematica-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7a0e2e1791aa4c096dfbd05cffb4b12b085239a9
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-mathematica
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-mathematica \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..75b8b30df7adc47c8b56b7bb1124ce29e38f8047
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-physics
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-physics \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-physics-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c263e8ceb36832605b7bb1e4f989bbbe71386cd1
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-physics
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-physics \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-physics-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9ec68d8b7c8267e2e05ad6d01947a2265dc9adc3
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-physics
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-physics \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2509f4e1cb1ea9d64b1a1cf5fc6cf10a529eaf59
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-programmers
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-programmers \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-programmers-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..562532f768b8fc20e31519a8a96746ddd35856e3
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-programmers
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-programmers \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-programmers-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9ec2b5b53070039d1229bc30e273e9dfb95428d4
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-programmers
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-programmers \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c963fbb48612b78abe21071b62e01463e9360557
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-stats
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-stats \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-stats-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c6e93c3d4b290fc040655e0605ad56a9d84bfefe
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-stats
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-stats \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-stats-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..77dee0ceaec027d3b0f06946fd82f04d630c5cb7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-stats
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-stats \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4bc3e0ab4aea74b6a27380ebe3413992a677a18e
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-tex
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-tex \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-tex-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f694890541777ca0b8ac764fc116d5f9b7909cf5
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-tex
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-tex \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-tex-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..273df443a41cdf3389ba023ca78776374439a61d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-tex
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-tex \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f08109e6de338dbbacb4cd858c231d57ce22caf7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-unix
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-unix \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-unix-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..668c33bb4295b14f80e9bc92efa64b8424dbdc8b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-unix
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-unix \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-unix-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a80d04351790505fb9bbdbaf78d16ec502f886f6
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-unix
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-unix \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c6705fd1c1e66846bfc3f8e5190d1ac43ff77f0
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-webmasters
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-webmasters \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-webmasters-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2ee07650500a4391f9e45fa0f51de62cdc974b36
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-webmasters
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-webmasters \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-webmasters-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fda3f5eb88b5b175946267b0c983463e159c62c0
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-webmasters
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-webmasters \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..347a80a05cef0dd570d36978f6d596ef096dee32
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-wordpress
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-wordpress \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-wordpress-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7ef78bd5e8091cb483fb03459b4640913a6fe4f5
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-wordpress
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/cqadupstack-wordpress \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-wordpress-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e76790fdbd1a2a14238ec5d630d97601d36612af
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — CQADupStack-wordpress
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-wordpress \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f0d7073698c7f446391141928a374931273c20af
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — DBPedia
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/dbpedia-entity \
+  -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-dbpedia-entity-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..69fb579b7718d752a18744beab6c742245806d81
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — DBPedia
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/dbpedia-entity \
+  -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-dbpedia-entity-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f88b9e113cc0fc868c94a05d9c77de4e02c72bf2
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — DBPedia
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/dbpedia-entity \
+  -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4458baccb3f24ab4b325e33a7838cab68322ff79
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — FEVER
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/fever \
+  -index indexes/lucene-index.beir-v1.0.0-fever-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fever-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9369c49b0a87854afb0cc56bc405a1b7fe89e671
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — FEVER
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/fever \
+  -index indexes/lucene-index.beir-v1.0.0-fever-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fever-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1948f7920f98a29611ca1052172cf8300dfca9a1
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — FEVER
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/fever \
+  -index indexes/lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..831fdea1d0dcf110b6f3893a85bdf9ff18710b54
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — FiQA-2018
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/fiqa \
+  -index indexes/lucene-index.beir-v1.0.0-fiqa-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fiqa-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d94a2598d60129af7dfef0873a028242a589b21e
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — FiQA-2018
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/fiqa \
+  -index indexes/lucene-index.beir-v1.0.0-fiqa-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fiqa-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7a30a31cf4debf5ab817e4e63e46d3f2a2c63614
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — FiQA-2018
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/fiqa \
+  -index indexes/lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-flat.20221116.505594.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-flat.20221116.505594.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2ef11bbf6f7bdb6899b98be579388cbcf6528a43
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-flat.20221116.505594.README.md
@@ -0,0 +1,237 @@
+# BEIR (v1.0.0): "flat" Lucene indexes
+
+These "flat" Lucene indexes were generated on 2022/11/16 at Anserini commit [`505594`](https://github.com/castorini/anserini/commit/505594b6573294a9a4c72a8feee3416f8a9bd2d9) on `tuna` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/trec-covid \
+  -index indexes/lucene-index.beir-v1.0.0-trec-covid-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-covid-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/bioasq \
+  -index indexes/lucene-index.beir-v1.0.0-bioasq-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-bioasq-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/nfcorpus \
+  -index indexes/lucene-index.beir-v1.0.0-nfcorpus-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nfcorpus-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/nq \
+  -index indexes/lucene-index.beir-v1.0.0-nq-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nq-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/hotpotqa \
+  -index indexes/lucene-index.beir-v1.0.0-hotpotqa-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-hotpotqa-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/fiqa \
+  -index indexes/lucene-index.beir-v1.0.0-fiqa-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fiqa-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/signal1m \
+  -index indexes/lucene-index.beir-v1.0.0-signal1m-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-signal1m-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/trec-news \
+  -index indexes/lucene-index.beir-v1.0.0-trec-news-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-news-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/robust04 \
+  -index indexes/lucene-index.beir-v1.0.0-robust04-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-robust04-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/arguana \
+  -index indexes/lucene-index.beir-v1.0.0-arguana-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-arguana-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/webis-touche2020 \
+  -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-webis-touche2020-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-android \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-android-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-english \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-english-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-gaming \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gaming-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-gis \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gis-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-mathematica \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-mathematica-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-physics \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-physics-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-programmers \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-programmers-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-stats \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-stats-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-tex \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-tex-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-unix \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-unix-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-webmasters \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-webmasters-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-wordpress \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-wordpress-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/quora \
+  -index indexes/lucene-index.beir-v1.0.0-quora-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-quora-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/dbpedia-entity \
+  -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-dbpedia-entity-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/scidocs \
+  -index indexes/lucene-index.beir-v1.0.0-scidocs-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scidocs-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/fever \
+  -index indexes/lucene-index.beir-v1.0.0-fever-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fever-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/climate-fever \
+  -index indexes/lucene-index.beir-v1.0.0-climate-fever-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-climate-fever-flat.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/scifact \
+  -index indexes/lucene-index.beir-v1.0.0-scifact-flat.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scifact-flat.20221116.505594 &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e07a916b987ba92da71e0e5a31d84fbd83b433a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — HotpotQA
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/hotpotqa \
+  -index indexes/lucene-index.beir-v1.0.0-hotpotqa-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-hotpotqa-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e97acc460d9725c3acafb92433b61963e13a5f70
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — HotpotQA
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/hotpotqa \
+  -index indexes/lucene-index.beir-v1.0.0-hotpotqa-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-hotpotqa-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3567ef64b53c7135e1914533f09fdabf841bfcef
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — HotpotQA
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/hotpotqa \
+  -index indexes/lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..453718050415aec6b30aa1f3899580667e599d2f
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-multifield.20221116.505594.README.md
@@ -0,0 +1,237 @@
+# BEIR (v1.0.0): "multifield" Lucene indexes
+
+These "multifield" Lucene indexes were generated on 2022/11/16 at Anserini commit [`505594`](https://github.com/castorini/anserini/commit/505594b6573294a9a4c72a8feee3416f8a9bd2d9) on `tuna` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/trec-covid \
+  -index indexes/lucene-index.beir-v1.0.0-trec-covid-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-covid-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/bioasq \
+  -index indexes/lucene-index.beir-v1.0.0-bioasq-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-bioasq-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/nfcorpus \
+  -index indexes/lucene-index.beir-v1.0.0-nfcorpus-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nfcorpus-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/nq \
+  -index indexes/lucene-index.beir-v1.0.0-nq-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nq-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/hotpotqa \
+  -index indexes/lucene-index.beir-v1.0.0-hotpotqa-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-hotpotqa-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/fiqa \
+  -index indexes/lucene-index.beir-v1.0.0-fiqa-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fiqa-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/signal1m \
+  -index indexes/lucene-index.beir-v1.0.0-signal1m-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-signal1m-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/trec-news \
+  -index indexes/lucene-index.beir-v1.0.0-trec-news-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-news-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/robust04 \
+  -index indexes/lucene-index.beir-v1.0.0-robust04-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-robust04-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/arguana \
+  -index indexes/lucene-index.beir-v1.0.0-arguana-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-arguana-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/webis-touche2020 \
+  -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-webis-touche2020-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-android \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-android-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-english \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-english-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-gaming \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gaming-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-gis \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gis-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-mathematica \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-mathematica-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-physics \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-physics-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-programmers \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-programmers-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-stats \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-stats-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-tex \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-tex-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-unix \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-unix-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-webmasters \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-webmasters-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/cqadupstack-wordpress \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-wordpress-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/quora \
+  -index indexes/lucene-index.beir-v1.0.0-quora-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-quora-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/dbpedia-entity \
+  -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-dbpedia-entity-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/scidocs \
+  -index indexes/lucene-index.beir-v1.0.0-scidocs-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scidocs-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/fever \
+  -index indexes/lucene-index.beir-v1.0.0-fever-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-fever-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/climate-fever \
+  -index indexes/lucene-index.beir-v1.0.0-climate-fever-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-climate-fever-multifield.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /tuna1/collections/beir-v1.0.0/corpus/scifact \
+  -index indexes/lucene-index.beir-v1.0.0-scifact-multifield.20221116.505594/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scifact-multifield.20221116.505594 &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f6b0c4dd2aa20282e8bf8a28e3b2c112d5b77dbd
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — NFCorpus
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/nfcorpus \
+  -index indexes/lucene-index.beir-v1.0.0-nfcorpus-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nfcorpus-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b66c5a2a377bc2bf7b5a148b38066163694cb5e4
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — NFCorpus
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/nfcorpus \
+  -index indexes/lucene-index.beir-v1.0.0-nfcorpus-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nfcorpus-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0705766a3a68097d058e9cb8e5ae2dc4f65dd5b6
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — NFCorpus
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/nfcorpus \
+  -index indexes/lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b9bcd6b966a9212d408730f21bfe80148ecfd32
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — NQ
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/nq \
+  -index indexes/lucene-index.beir-v1.0.0-nq-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nq-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b256ea81d29b920138c965ba34822d35c27816a6
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — NQ
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/nq \
+  -index indexes/lucene-index.beir-v1.0.0-nq-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-nq-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..77352d39f3f1bbdc6bffcacf7358fb55b2d69ddc
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — NQ
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/nq \
+  -index indexes/lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1a06f63dfd59b1a5cee92c5e40dedcc4fe2586a6
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Quora
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/quora \
+  -index indexes/lucene-index.beir-v1.0.0-quora-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-quora-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f4a8507dde1f2716bf665f6fb3a93db06749cf31
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Quora
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/quora \
+  -index indexes/lucene-index.beir-v1.0.0-quora-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-quora-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..04cdccf3318e6e12943d8418a07aa6dc2a10264c
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Quora
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/quora \
+  -index indexes/lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..25f2f0a4fb1e228eadb9f827321bea28a86ec3df
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Robust04
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/robust04 \
+  -index indexes/lucene-index.beir-v1.0.0-robust04-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-robust04-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0b9e79e25327c76b754ea0af611099579024788a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Robust04
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/robust04 \
+  -index indexes/lucene-index.beir-v1.0.0-robust04-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-robust04-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0d44304deb5afb9413d8f80163d5a9ceb3b11f7b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Robust04
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/robust04 \
+  -index indexes/lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2bf9d2ad8a723c03c0da71804f30144aa46cc9f3
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — SCIDOCS
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/scidocs \
+  -index indexes/lucene-index.beir-v1.0.0-scidocs-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scidocs-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a3cbd51c3b345bcfa97d1ea06d5e917059ed4dda
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — SCIDOCS
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/scidocs \
+  -index indexes/lucene-index.beir-v1.0.0-scidocs-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scidocs-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1ba59c9233ad0e3b15650b6ae1b409575397eec9
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — SCIDOCS
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/scidocs \
+  -index indexes/lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4151b71c3bf7ef0bc321af236cbfa87b6ea04c33
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — SciFact
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/scifact \
+  -index indexes/lucene-index.beir-v1.0.0-scifact-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scifact-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b6f0d29faff8db6ced0211bacd137ede06dea6f4
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — SciFact
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/scifact \
+  -index indexes/lucene-index.beir-v1.0.0-scifact-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-scifact-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f78d4f52b0824193dcf65db562068bea4cc04df8
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — SciFact
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/scifact \
+  -index indexes/lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a7e897c398b8311c06098306d311bdc86a94558
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Signal-1M
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/signal1m \
+  -index indexes/lucene-index.beir-v1.0.0-signal1m-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-signal1m-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..05b73aa4d025f815116432ab5a199df1d6a17cdc
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Signal-1M
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/signal1m \
+  -index indexes/lucene-index.beir-v1.0.0-signal1m-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-signal1m-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..98c672c1dd524b7c1212fdef5823c7af5bcda879
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Signal-1M
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/signal1m \
+  -index indexes/lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..21cc0eafa438acf841b8e750ce7da72cabc19b95
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-splade_distil_cocodenser_medium.20221116.505594.README.md
@@ -0,0 +1,237 @@
+# BEIR (v1.0.0): SPLADE-distill CoCodenser-medium
+
+These Lucene impact indexes for SPLADE-distill CoCodenser-medium were generated on 2022/11/16 at Anserini commit [`505594`](https://github.com/castorini/anserini/commit/505594b6573294a9a4c72a8feee3416f8a9bd2d9) on `tuna` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/trec-covid \
+  -index indexes/lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/bioasq \
+  -index indexes/lucene-index.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-bioasq-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/nfcorpus \
+  -index indexes/lucene-index.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-nfcorpus-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/nq \
+  -index indexes/lucene-index.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-nq-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/hotpotqa \
+  -index indexes/lucene-index.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-hotpotqa-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/fiqa \
+  -index indexes/lucene-index.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-fiqa-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/signal1m \
+  -index indexes/lucene-index.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-signal1m-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/trec-news \
+  -index indexes/lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/robust04 \
+  -index indexes/lucene-index.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-robust04-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/arguana \
+  -index indexes/lucene-index.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-arguana-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/webis-touche2020 \
+  -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-android \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-android-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-english \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-english-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-gaming \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gaming-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-gis \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-gis-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-mathematica \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-mathematica-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-physics \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-physics-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-programmers \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-programmers-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-stats \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-stats-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-tex \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-tex-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-unix \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-unix-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-webmasters \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-webmasters-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/cqadupstack-wordpress \
+  -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-cqadupstack-wordpress-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/quora \
+  -index indexes/lucene-index.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-quora-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/dbpedia-entity \
+  -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-dbpedia-entity-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/scidocs \
+  -index indexes/lucene-index.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-scidocs-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/fever \
+  -index indexes/lucene-index.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-fever-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/climate-fever \
+  -index indexes/lucene-index.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-climate-fever-splade_distil_cocodenser_medium.20221116.505594 &
+
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /tuna1/collections/beir-v1.0.0/splade_distil_cocodenser_medium/scifact \
+  -index indexes/lucene-index.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20221116.505594/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-scifact-splade_distil_cocodenser_medium.20221116.505594 &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d3a2822ccf6b9a26c9fd2e4d10c556b6fcb8e978
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — TREC-COVID
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/trec-covid \
+  -index indexes/lucene-index.beir-v1.0.0-trec-covid-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-covid-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..dc2e127f38b4c90a6a3a5fe9ca97b798092a14f2
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — TREC-COVID
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/trec-covid \
+  -index indexes/lucene-index.beir-v1.0.0-trec-covid-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-covid-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..641d2fcdb39d83e2fc042b746f05d9890eac8248
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — TREC-COVID
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/trec-covid \
+  -index indexes/lucene-index.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-trec-covid-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4af25537673536c1311813c06815a7bd54b3b696
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — TREC-NEWS
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/trec-news \
+  -index indexes/lucene-index.beir-v1.0.0-trec-news-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-news-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..29296fa8c0a92974921b0e0cce7d5b2ac59911a5
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — TREC-NEWS
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/trec-news \
+  -index indexes/lucene-index.beir-v1.0.0-trec-news-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-trec-news-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..62e905c208a5854b9f960da4ea4db12d7d217826
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — TREC-NEWS
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/trec-news \
+  -index indexes/lucene-index.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-trec-news-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-flat.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-flat.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9888c1109b4834f90282f16fa1c8ae4ec132cad7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-flat.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Webis-Touche2020
+
+This **"flat" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirFlatCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/webis-touche2020 \
+  -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-flat.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-webis-touche2020-flat.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-multifield.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-multifield.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3d0098ce7c725f493438c2e55ceff423932b6a8b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-multifield.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Webis-Touche2020
+
+This **"multifield" Lucene index** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection BeirMultifieldCollection \
+  -input /scratch2/collections/beir-v1.0.0/corpus/webis-touche2020 \
+  -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-multifield.20220501.1842ee/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -fields title -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.beir-v1.0.0-webis-touche2020-multifield.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20220501.1842ee.README.md b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20220501.1842ee.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..880814b874258e217d572dc65967e760d7ba1e4e
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20220501.1842ee.README.md
@@ -0,0 +1,13 @@
+# BEIR (v1.0.0) — Webis-Touche2020
+
+This Lucene impact index for **SPLADE-distill CoCodenser-medium** was generated on 2022/05/01 at Anserini commit [`1842ee`](https://github.com/castorini/anserini/commit/1842eeffcbf4d18698d401b1c5a4b1c868f32fc6) on `damiano` with the following command:
+
+```
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -generator DefaultLuceneDocumentGenerator \
+  -input /scratch2/collections/beir-v1.0.0/splade_distil_cocodenser_medium/webis-touche2020 \
+  -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20220501.1842ee/ \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.beir-v1.0.0-webis-touche2020-splade_distil_cocodenser_medium.20220501.1842ee &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.ciral-v1.0.20230721.e850ea.README.md b/pyserini/resources/index-metadata/lucene-index.ciral-v1.0.20230721.e850ea.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ccc621f0de8edf1bf3b8aff08bc882d055b9914d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.ciral-v1.0.20230721.e850ea.README.md
@@ -0,0 +1,19 @@
+# CIRAL v1.0 BM25 Indexes
+
+Lucene indexes for CIRAL covering all four languages.
+
+This index was generated on 2023/07/21 at Anserini commit [`e850ea`](https://github.com/castorini/anserini/commit/e850eaa5b0e3c0e406628cb1dbcf788ae46caf50) on `basilisk` with the following command:
+
+```bash
+lang=ha # or yo, sw, so
+target/appassembler/bin/IndexCollection \
+        -collection MrTyDiCollection \
+        -input ciral-passages-$lang/ \
+        -index lucene-index.ciral-v1.0-$lang \ 
+        -generator DefaultLuceneDocumentGenerator \
+        -threads 16 \
+        -language $lang \
+        -pretokenized \
+        -optimize \
+        -storePositions -storeDocvectors -storeRaw
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-fa.20220719.71c120.README.md b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-fa.20220719.71c120.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..68d9e2c13c484da162d7c85d6dd216ad3572729b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-fa.20220719.71c120.README.md
@@ -0,0 +1,14 @@
+# hc4-v1.0-fa
+
+Lucene index for HC4 v1.0 (Persian).
+
+This index was generated on 2022/07/19 at Anserini commit [`71c120`](https://github.com/castorini/anserini/commit/71c1200d36ce17615cf4da510ac4ef2d2f0121f6) on `orca` with the following command:
+
+
+```
+target/appassembler/bin/IndexCollection -collection NeuClirCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 8 \
+  -input /store/collections/multilingual/hc4-v1.0-fa \
+  -index indexes/lucene-index.hc4-v1.0-fa.20220719.71c120 \
+  -storePositions -storeDocvectors -storeRaw -optimize -language fa
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-ru.20220719.71c120.README.md b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-ru.20220719.71c120.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5ed6c49f50bb420616b448cf9b7bde90a13f45e2
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-ru.20220719.71c120.README.md
@@ -0,0 +1,13 @@
+# hc4-v1.0-ru
+
+Lucene index for HC4 v1.0 (Russian).
+
+This index was generated on 2022/07/19 at Anserini commit [`71c120`](https://github.com/castorini/anserini/commit/71c1200d36ce17615cf4da510ac4ef2d2f0121f6) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection NeuClirCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 8 \
+  -input /store/collections/multilingual/hc4-v1.0-ru \
+  -index indexes/lucene-index.hc4-v1.0-ru.20220719.71c120 \
+  -storePositions -storeDocvectors -storeRaw -optimize -language ru
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-zh.20220719.71c120.README.md b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-zh.20220719.71c120.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..33b6541762a722bbf498f66c6b9dec75f2818071
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0-zh.20220719.71c120.README.md
@@ -0,0 +1,13 @@
+# hc4-v1.0-zh
+
+Lucene index for HC4 v1.0 (Chinese).
+
+This index was generated on 2022/07/19 at Anserini commit [`71c120`](https://github.com/castorini/anserini/commit/71c1200d36ce17615cf4da510ac4ef2d2f0121f6) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection NeuClirCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 8 \
+  -input /store/collections/multilingual/hc4-v1.0-zh \
+  -index indexes/lucene-index.hc4-v1.0-zh.20220719.71c120 \
+  -storePositions -storeDocvectors -storeRaw -optimize -language zh
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.hc4-v1.0.20221025.c4a8d0.README.md b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0.20221025.c4a8d0.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ead6ffe5131727e51b38a08ddcf328583cb4bfc
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.hc4-v1.0.20221025.c4a8d0.README.md
@@ -0,0 +1,34 @@
+# HC4 v1.0 Indexes
+
+Lucene indexes for HC4 v1.0 (Persian, Russian, and Chinese).
+
+These indexes was generated on 2022/10/25 at Anserini commit [`c4a8d0`](https://github.com/castorini/anserini/commit/c4a8d00e3c218ed89dca8a4e51c3b2c7d577db00) on `tuna` with the following commands:
+
+```bash
+# HC4 fa
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/hc4-v1.0-fa \
+  -index indexes/lucene-index.hc4-v1.0-fa.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -language fa -optimize \
+  >& logs/log.hc4-v1.0-fa.20221025.c4a8d0 &
+
+# HC4 ru
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/hc4-v1.0-ru \
+  -index indexes/lucene-index.hc4-v1.0-ru.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -language ru -optimize \
+  >& logs/log.hc4-v1.0-ru.20221025.c4a8d0 &
+
+# HC4 zh
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/hc4-v1.0-zh \
+  -index indexes/lucene-index.hc4-v1.0-zh.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -language zh -optimize \
+  >& logs/log.hc4-v1.0-zh.20221025.c4a8d0 &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.miracl-v1.0.20221004.2b2856.README.md b/pyserini/resources/index-metadata/lucene-index.miracl-v1.0.20221004.2b2856.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d95eed6ae4b69864fb050ca49124ea3bdd5e5aeb
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.miracl-v1.0.20221004.2b2856.README.md
@@ -0,0 +1,14 @@
+# miracl-v1.0
+
+Lucene index for MIRACL v1.0 (All languages)
+
+This index was generated on 2022/10/04 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+```
+lang=ar # or: bn en fi fr hi id ja ko fa ru es sw te th zh
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MIRACL/miracl-corpus-v1.0-$lang \
+    -index lucene-index.miracl-v1.0-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $lang
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-arabic.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-arabic.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0b3e01f260ce45f107e06ee6bea61a4fb4b08d2c
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-arabic.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-arabic
+
+Lucene index for Mr.TyDi v1.1 (Arabic).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-arabic/ \
+  -index indexes/lucene-index.mrtydi-v1.1-arabic.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language ar
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-arabic.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-arabic.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..451c03a78332f4ff0daa99d62cc9eb3038ad243d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-arabic.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-arabic
+
+Lucene index for Mr.TyDi v1.1 (Arabic).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=arabic
+abbr=ar
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-bengali.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-bengali.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8d6565e9dcc57e1b5eabcb7ad1e590997f662f94
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-bengali.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-bengali
+
+Lucene index for Mr.TyDi v1.1 (Bengali).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-bengali/ \
+  -index indexes/lucene-index.mrtydi-v1.1-bengali.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language bn
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-bengali.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-bengali.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a7a1e252af11910e9e31c33ab9682086daf43ead
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-bengali.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-bengali
+
+Lucene index for Mr.TyDi v1.1 (Bengali).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=bengali
+abbr=bn
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-english.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-english.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8c4f02f197373f57ab0765a0b639a7951f02a877
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-english.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-english
+
+Lucene index for Mr.TyDi v1.1 (English).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-english/ \
+  -index indexes/lucene-index.mrtydi-v1.1-english.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language en
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-english.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-english.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b664d41a371476a3f6ef3799b9503837502fe40a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-english.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-english
+
+Lucene index for Mr.TyDi v1.1 (English).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=english
+abbr=en
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-finnish.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-finnish.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..10b161844fb9084bea40b96332f77087ffe34db7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-finnish.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-finnish
+
+Lucene index for Mr.TyDi v1.1 (Finnish).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-finnish/ \
+  -index indexes/lucene-index.mrtydi-v1.1-finnish.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language fi
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-finnish.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-finnish.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..156c594906efc4a3d50d1454167a5696c011258b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-finnish.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-finnish
+
+Lucene index for Mr.TyDi v1.1 (Finnish).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=finnish
+abbr=fi
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-indonesian.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-indonesian.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..13570f2745912620cf6efd5149777186861e05bb
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-indonesian.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-indonesian
+
+Lucene index for Mr.TyDi v1.1 (Indonesian).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-indonesian/ \
+  -index indexes/lucene-index.mrtydi-v1.1-indonesian.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language id
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-indonesian.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-indonesian.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0a8b36fe6c6533daeca301e86fdc12151401278a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-indonesian.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-indonesian
+
+Lucene index for Mr.TyDi v1.1 (Indonesian).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=indonesian
+abbr=id
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-japanese.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-japanese.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a78a572a06d616d4ebf1cbdb79487575ae1a7935
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-japanese.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-japanese
+
+Lucene index for Mr.TyDi v1.1 (Japanese).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-japanese/ \
+  -index indexes/lucene-index.mrtydi-v1.1-japanese.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language ja
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-japanese.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-japanese.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3fc37ed50ebb6ca33fb135da4161dea5eff86c1d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-japanese.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-japanese
+
+Lucene index for Mr.TyDi v1.1 (Japanese).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=japanese
+abbr=ja
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-korean.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-korean.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3810f62615e23573b17b54309851945d66afdd92
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-korean.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-korean
+
+Lucene index for Mr.TyDi v1.1 (Korean).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-korean/ \
+  -index indexes/lucene-index.mrtydi-v1.1-korean.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language ko
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-korean.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-korean.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed106fd28b6c8223033beaec93b69186d2f8437b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-korean.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-korean
+
+Lucene index for Mr.TyDi v1.1 (Korean).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=korean
+abbr=ko
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-russian.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-russian.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7df1ccbf99d852cd2d5482d774e672c5dae2174a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-russian.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-russian
+
+Lucene index for Mr.TyDi v1.1 (Russian).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-russian/ \
+  -index indexes/lucene-index.mrtydi-v1.1-russian.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language ru
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-russian.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-russian.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3ee7340f3405e871dfe181c0e6e6c4addeaeff22
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-russian.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-russian
+
+Lucene index for Mr.TyDi v1.1 (Russian).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=russian
+abbr=ru
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-swahili.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-swahili.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5d7fd59cf679b0135f08f59f58dd21ef97380531
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-swahili.20220108.6fcb89.README.md
@@ -0,0 +1,16 @@
+# mrtydi-v1.1-swahili
+
+Lucene index for Mr.TyDi v1.1 (Swahili).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-swahili/ \
+  -index indexes/lucene-index.mrtydi-v1.1-swahili.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -pretokenized
+```
+
+Note that `-language sw` gives identical results (and is more semantically accurate) but since we do not have a language-specific tokenizer here, we just use the whitespace tokenizer, which is what `-pretokenized` uses.
+This index was built based on Anserini regressions at the time; see [Anserini #1727](https://github.com/castorini/anserini/pull/1727).
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-swahili.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-swahili.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..02e7bb87583e06f3ee9db6aeff24548106322360
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-swahili.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-swahili
+
+Lucene index for Mr.TyDi v1.1 (Swahili).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=swahili
+abbr=sw
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-telugu.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-telugu.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..eedd6e194610efd46b781998f78a8091124c682d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-telugu.20220108.6fcb89.README.md
@@ -0,0 +1,16 @@
+# mrtydi-v1.1-telugu
+
+Lucene index for Mr.TyDi v1.1 (Telugu).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-telugu/ \
+  -index indexes/lucene-index.mrtydi-v1.1-telugu.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -pretokenized
+```
+
+Note that `-language te` gives identical results (and is more semantically accurate) but since we do not have a language-specific tokenizer here, we just use the whitespace tokenizer, which is what `-pretokenized` uses.
+This index was built based on Anserini regressions at the time; see [Anserini #1727](https://github.com/castorini/anserini/pull/1727).
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-telugu.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-telugu.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2f4bcd9b05c9cbd5ad145521c89111476a1ac104
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-telugu.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-telugu
+
+Lucene index for Mr.TyDi v1.1 (Telugu).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=telugu
+abbr=te
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-thai.20220108.6fcb89.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-thai.20220108.6fcb89.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..296e30f8aacfddbe7166db1f51a93c8806b59b5b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-thai.20220108.6fcb89.README.md
@@ -0,0 +1,13 @@
+# mrtydi-v1.1-thai
+
+Lucene index for Mr.TyDi v1.1 (Thai).
+
+This index was generated on 2022/01/08 at Anserini commit [`6fcb89`](https://github.com/castorini/anserini/commit/6fcb896c61e2b8cf2f235def3e95dda5fe4cd2fc) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MrTyDiCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input /store/collections/mr-tydi-corpus/mrtydi-v1.1-thai/ \
+  -index indexes/lucene-index.mrtydi-v1.1-thai.20220108.6fcb89/ \
+  -storePositions -storeDocvectors -storeRaw -optimize -language th
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-thai.20220928.b5ecc5.README.md b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-thai.20220928.b5ecc5.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5a23bafff699f7d3c0272d116c06691ba8b515db
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.mrtydi-v1.1-thai.20220928.b5ecc5.README.md
@@ -0,0 +1,17 @@
+# mrtydi-v1.1-thai
+
+Lucene index for Mr.TyDi v1.1 (Thai).
+
+This index was generated on 2022/09/28 at Anserini commit [`b5ecc5`](https://github.com/castorini/anserini/commit/b5ecc5aff79ddfc82b175f6bd3048f5039f0480f) on `orca` with the following command:
+
+```
+lang=thai
+abbr=th
+
+target/appassembler/bin/IndexCollection \
+    -collection MrTyDiCollection \
+    -input MrTyDi/miracl-corpus-v1.0-$lang \
+    -index indexes-miracl/lucene-index.mrtydi-v1.1-$lang \
+    -generator DefaultLuceneDocumentGenerator \
+    -threads 16 -storePositions -storeDocvectors -storeRaw -language $abbr
+```
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-doc-per-passage-expansion.unicoil-d2q.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-doc-per-passage-expansion.unicoil-d2q.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8d2cc17fac6d057f46e43119cf1cb88ec3ede1c
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-doc-per-passage-expansion.unicoil-d2q.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-doc-per-passage-expansion-unicoil-d2q-b8/ \
+ -index indexes/lucene-index.msmarco-doc-per-passage-expansion.unicoil-d2q.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-doc-per-passage-expansion.unicoil-d2q.20211012.58d286.tar.gz MD5 checksum = 44bfc848f9a77302b10a59c5b136eb95
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-passage.deepimpact.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.deepimpact.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e356525181b2355570566101d29e3286697a7709
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.deepimpact.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-passage-deepimpact-b8/ \
+ -index indexes/lucene-index.msmarco-passage.deepimpact.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-passage.deepimpact.20211012.58d286.tar.gz MD5 checksum = 9938f5529fee5cdb405b8587746c9e93
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-passage.distill-splade-max.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.distill-splade-max.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a3b451a3d3300eeb9ee0be65ab818e2616851be
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.distill-splade-max.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-passage-distill-splade-max \
+ -index indexes/lucene-index.msmarco-passage.distill-splade-max.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-passage.distill-splade-max.20211012.58d286.tar.gz MD5 checksum = 957c0dd1b78b61aeddc8685150fd8360
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-passage.unicoil-d2q.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.unicoil-d2q.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b9b427dc795ba581282d4226d4d43d877ff38f25
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.unicoil-d2q.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-passage-unicoil-b8/ \
+ -index indexes/lucene-index.msmarco-passage.unicoil-d2q.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-passage.unicoil-d2q.20211012.58d286.tar.gz MD5 checksum = 4a8cb3b86a0d9085a0860c7f7bb7fe99
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-passage.unicoil-tilde.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.unicoil-tilde.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..817abea4cbd7e2f91faeae7c40a6e7018d97ad96
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-passage.unicoil-tilde.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-passage-unicoil-tilde-expansion-b8/ \
+ -index indexes/lucene-index.msmarco-passage.unicoil-tilde.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-passage.unicoil-tilde.20211012.58d286.tar.gz MD5 checksum = cc19cfe241053f5a303f7f05a7ac40a5
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d8b7721fe6cebe272126681a8a86d75a53344226
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v1-doc-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V1 document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /scratch2/collections/msmarco/msmarco-doc-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..225bce6f8217691346370fc9c8f80ff92b876c9b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v1-doc-d2q-t5
+
+Lucene index of the MS MARCO V1 document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /store/collections/msmarco/msmarco-doc-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..34aaad4377167adca0a4b22d273ad501143edb67
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e.README.md
@@ -0,0 +1,24 @@
+# msmarco-v1-doc-d2q-t5
+
+Lucene index of the MS MARCO V1 document corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v1-doc-d2q-t5` (2.1G uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v1-doc-d2q-t5-docvectors` (12G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/10/04 at Anserini commit [`252b5e`](https://github.com/castorini/anserini/commit/252b5e2087dd7b3b994d41a444d4ae0044519819) on `tuna` with the following commands:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /tuna1/collections/msmarco/msmarco-doc-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-d2q-t5.20221004.252b5e/ \
+  -optimize >& logs/log.msmarco-v1-doc-d2q-t5.20221004.252b5e &
+
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /tuna1/collections/msmarco/msmarco-doc-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-d2q-t5-docvectors.20221004.252b5e/ \
+  -storeDocvectors -optimize >& logs/log.msmarco-v1-doc-d2q-t5-docvectors.20221004.252b5e &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-full.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-full.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1d6038a93f3d84bb9ee22a63b1a177ef1d091325
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-full.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-doc-full
+
+Lucene index of the MS MARCO V1 document corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /store/collections/msmarco/msmarco-doc/ \
+  -index indexes/lucene-index.msmarco-v1-doc-full.20220131.9ea315/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-doc` (16G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-slim` (2.0G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-full` (28G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9c269aa53d6c226e98956a84252959adb66cee6b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v1-doc-segmented-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V1 segmented document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 32 \
+  -input /scratch2/collections/msmarco/msmarco-doc-segmented-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0cd6c61f11f03bf01a4644b5fa4cc09e7aeca52c
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v1-doc-segmented-d2q-t5
+
+Lucene index of the MS MARCO V1 segmented document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /store/collections/msmarco/msmarco-doc-segmented-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..00fa81f125c2bcd58af7b5abb9c4e5a90cfbbd24
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e.README.md
@@ -0,0 +1,24 @@
+# msmarco-v1-doc-segmented-d2q-t5
+
+Lucene index of the MS MARCO V1 segmented document corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v1-doc-segmented-d2q-t5` (4.1G uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v1-doc-segmented-d2q-t5-docvectors` (19G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/10/04 at Anserini commit [`252b5e`](https://github.com/castorini/anserini/commit/252b5e2087dd7b3b994d41a444d4ae0044519819) on `tuna` with the following commands:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /tuna1/collections/msmarco/msmarco-doc-segmented-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e/ \
+  -optimize >& logs/log.msmarco-v1-doc-segmented-d2q-t5.20221004.252b5e &
+
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 32 \
+  -input /tuna1/collections/msmarco/msmarco-doc-segmented-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-d2q-t5-docvectors.20221004.252b5e/ \
+  -storeDocvectors -optimize >& logs/log.msmarco-v1-doc-segmented-d2q-t5-docvectors.20221004.252b5e &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-full.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-full.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4a178b038aed81b22e410b0d047e0f9ea3215a7f
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-full.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-doc-segmented-full
+
+Lucene index of the MS MARCO V1 segmented document corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /store/collections/msmarco/msmarco-doc-segmented/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-full.20220131.9ea315/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-doc-segmented` (20G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-segmented-slim` (3.9G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-segmented-full` (35G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-slim.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-slim.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..649cbb587fd3506f7cede3e382a87ae73ef04229
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-slim.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-doc-segmented-slim
+
+Lucene index of the MS MARCO V1 segmented document corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /store/collections/msmarco/msmarco-doc-segmented/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-slim.20220131.9ea315/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-doc-segmented` (20G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-segmented-slim` (3.9G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-segmented-full` (35G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-unicoil-noexp.20220419.c47993.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-unicoil-noexp.20220419.c47993.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..35a6015224d48861db6c242b0277170381674a32
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-unicoil-noexp.20220419.c47993.README.md
@@ -0,0 +1,15 @@
+# msmarco-v1-doc-segmented-unicoil-noexp
+
+Lucene impact index of the MS MARCO V1 segmented document corpus for uniCOIL (noexp) with title prepended.
+
+This index was generated on 2022/04/19 at Pyserini commit [`c47993`](https://github.com/castorini/pyserini/commit/c47993aa2bebb8ab0a418214cfd299c0d0351c81) on `orca` with the following command:
+
+```
+python -m pyserini.index.lucene \
+  --collection JsonVectorCollection \
+  --input embeddings_msmarco-v1-doc-segmented-unicoil-noexp \
+  --index indexes/lucene-index.msmarco-v1-doc-segmented-unicoil-noexp \
+  --generator DefaultLuceneDocumentGenerator \
+  --threads 12 \
+  --impact --pretokenized --optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-unicoil.20220219.6a7080.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-unicoil.20220219.6a7080.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1a13ac701612a01ecf6fe489d75ab62a5d3bbd12
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented-unicoil.20220219.6a7080.README.md
@@ -0,0 +1,14 @@
+# msmarco-v1-doc-segmented-unicoil
+
+Lucene impact index of the MS MARCO V1 segmented document corpus for uniCOIL.
+
+This index was generated on 2022/02/19 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/6a708047f71528f7d516c0dd45485204a36e6b1d) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco-doc-segmented-unicoil \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-unicoil.20220219.6a7080/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8e257c9861c34b545ec73a977cfdcdf0b7fc9f34
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-doc-segmented
+
+Lucene index of the MS MARCO V1 segmented document corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /store/collections/msmarco/msmarco-doc-segmented/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented.20220131.9ea315/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-doc-segmented` (20G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-segmented-slim` (3.9G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-segmented-full` (35G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5766d69d57134ee7646f36fa920d001c9e0897d7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-segmented.20221004.252b5e.README.md
@@ -0,0 +1,31 @@
+# msmarco-v1-doc-segmented
+
+Lucene index of the MS MARCO V1 segmented document corpus.
+
+Note that there are three variants:
+
++ `msmarco-v1-doc-segmented` (19G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-segmented-slim` (3.9G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-segmented-full` (33G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/10/04 at Anserini commit [`252b5e`](https://github.com/castorini/anserini/commit/252b5e2087dd7b3b994d41a444d4ae0044519819) on `tuna` with the following commands:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /tuna1/collections/msmarco/msmarco-doc-segmented/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented.20221004.252b5e/ \
+  -storeRaw -optimize >& logs/log.msmarco-v1-doc-segmented.20221004.252b5e &
+
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /tuna1/collections/msmarco/msmarco-doc-segmented/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-slim.20221004.252b5e/ \
+  -optimize >& logs/log.msmarco-v1-doc-segmented-slim.20221004.252b5e &
+
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 16 \
+  -input /tuna1/collections/msmarco/msmarco-doc-segmented/ \
+  -index indexes/lucene-index.msmarco-v1-doc-segmented-full.20221004.252b5e/ \
+  -storePositions -storeDocvectors -storeRaw -optimize >& logs/log.msmarco-v1-doc-segmented-full.20221004.252b5e &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-slim.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-slim.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..114aedff6734473458913da8030ec5dc44227b49
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc-slim.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-doc-slim
+
+Lucene index of the MS MARCO V1 document corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /store/collections/msmarco/msmarco-doc/ \
+  -index indexes/lucene-index.msmarco-v1-doc-slim.20220131.9ea315/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-doc` (16G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-slim` (2.0G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-full` (28G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ef63fc70345d5e84bcd5b423e841d6ea189f6397
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-doc
+
+Lucene index of the MS MARCO V1 document corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /store/collections/msmarco/msmarco-doc/ \
+  -index indexes/lucene-index.msmarco-v1-doc.20220131.9ea315/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-doc` (16G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-slim` (2.0G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-full` (28G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc.20221004.252b5e.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc.20221004.252b5e.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1719bab44f55c1b437fad8e7034c16acc8f86319
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-doc.20221004.252b5e.README.md
@@ -0,0 +1,31 @@
+# msmarco-v1-doc
+
+Lucene index of the MS MARCO V1 document corpus.
+
+Note that there are three variants:
+
++ `msmarco-v1-doc` (16G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-doc-slim` (2.0G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-doc-full` (28G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/10/04 at Anserini commit [`252b5e`](https://github.com/castorini/anserini/commit/252b5e2087dd7b3b994d41a444d4ae0044519819) on `tuna` with the following commands:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /tuna1/collections/msmarco/msmarco-doc/ \
+  -index indexes/lucene-index.msmarco-v1-doc.20221004.252b5e/ \
+  -storeRaw -optimize >& logs/log.msmarco-v1-doc.20221004.252b5e &
+
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /tuna1/collections/msmarco/msmarco-doc/ \
+  -index indexes/lucene-index.msmarco-v1-doc-slim.20221004.252b5e/ \
+  -optimize >& logs/log.msmarco-v1-doc-slim.20221004.252b5e &
+
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 7 \
+  -input /tuna1/collections/msmarco/msmarco-doc/ \
+  -index indexes/lucene-index.msmarco-v1-doc-full.20221004.252b5e/ \
+  -storePositions -storeDocvectors -storeRaw -optimize >& logs/log.msmarco-v1-doc-full.20221004.252b5e &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b04b8f9e5f217d6f83a7ca5858fa8ff395724436
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v1-passage-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V1 passage corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco-passage-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0d6a6e2c1849d231a2e6a911115e53d9a0b17627
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v1-passage-d2q-t5
+
+Lucene index of the MS MARCO V1 passage corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco-passage-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-passage-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..013ec39c0aa10da5c60bc2463c55197abe3af735
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e.README.md
@@ -0,0 +1,24 @@
+# msmarco-v1-passage-d2q-t5
+
+Lucene index of the MS MARCO V1 passage corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v1-passage-d2q-t5` (972M uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v1-passage-d2q-t5-docvectors` (5.0G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/10/04 at Anserini commit [`252b5e`](https://github.com/castorini/anserini/commit/252b5e2087dd7b3b994d41a444d4ae0044519819) on `tuna` with the following commands:
+
+```
+nohup target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /tuna1/collections/msmarco/msmarco-passage-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-passage-d2q-t5.20221004.252b5e/ \
+  -optimize >& logs/log.msmarco-v1-passage-d2q-t5.20221004.252b5e &
+
+nohup target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /tuna1/collections/msmarco/msmarco-passage-docTTTTTquery/ \
+  -index indexes/lucene-index.msmarco-v1-passage-d2q-t5-docvectors.20221004.252b5e/ \
+  -storeDocvectors -optimize >& logs/log.msmarco-v1-passage-d2q-t5-docvectors.20221004.252b5e &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-full.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-full.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f508b2f9acd1f1d6911ef91a118f1a3665162ed1
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-full.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-passage-full
+
+Lucene index of the MS MARCO V1 passage corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 9 \
+  -input /store/collections/msmarco/passage/ \
+  -index indexes/lucene-index.msmarco-v1-passage-full.20220131.9ea315/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-passage` (2.5G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-passage-slim` (616M uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-passage-full` (4.3G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slim.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slim.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..808d3b06fb3d29c553c337ab4bde4a5768dcf11f
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slim.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-passage-slim
+
+Lucene index of the MS MARCO V1 passage corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 9 \
+  -input /store/collections/msmarco/passage/ \
+  -index indexes/lucene-index.msmarco-v1-passage-slim.20220131.9ea315/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-passage` (2.5G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-passage-slim` (616M uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-passage-full` (4.3G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slimr-pp.20230220.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slimr-pp.20230220.md
new file mode 100644
index 0000000000000000000000000000000000000000..f0889b95214a708f78a383caf93a3d11af4a1274
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slimr-pp.20230220.md
@@ -0,0 +1,11 @@
+This index was generated on 2023/02/20 with the following command:
+
+python -m pyserini.index.lucene \
+  --collection JsonVectorCollection \
+  --input collections/slimr_qtopk20_ptopk20_hardneg7_nobalanced_hardneg_distilled \
+  --index lucene-index.msmarco-v1-passage-slimr-pp.20230220 \
+  --generator DefaultLuceneDocumentGenerator \
+  --threads 48 \
+  --impact --pretokenized
+
+lucene-index.msmarco-v1-passage-slimr-pp.20230220.tar.gz MD5 checksum = 17b2edd909bcda4980a93fb0ab87e72b
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slimr.20230220.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slimr.20230220.md
new file mode 100644
index 0000000000000000000000000000000000000000..db8cf12b250edd8ca68856ecbfc065dca0e088de
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-slimr.20230220.md
@@ -0,0 +1,11 @@
+This index was generated on 2023/02/20 with the following command:
+
+python -m pyserini.index.lucene \
+  --collection JsonVectorCollection \
+  --input collections/slimr_qtopk20_ptopk20_hardneg7_nobalanced \
+  --index lucene-index.msmarco-v1-passage-slimr.20230220 \
+  --generator DefaultLuceneDocumentGenerator \
+  --threads 48 \
+  --impact --pretokenized
+
+lucene-index.msmarco-v1-passage-slimr.20230220.tar.gz MD5 checksum = 79e566fee4f376096e12a33cf67c8012
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..057f8190d60343b216a9b5e11e0ca55cfa3d6e17
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-splade-pp.20230524.a59610.README.md
@@ -0,0 +1,63 @@
+# SPLADE++ Indexes for MS MARCO V1 Passage
+
+These are Lucene impact indexes for MS MARCO V1 Passage using the SPLADE++ models.
+There are two separate model variants (SPLADE++ CoCondenser-EnsembleDistil and SPLADE++ CoCondenser-SelfDistil), three index types each:
+
++ `msmarco-v1-passage-splade-pp-ed` (2.3G uncompressed): SPLADE++ CoCondenser-EnsembleDistil, minimal TF index.
++ `msmarco-v1-passage-splade-pp-ed-docvectors` (61G uncompressed): with docvectors stored.
++ `msmarco-v1-passage-splade-pp-ed-text` (12G uncompressed): with text stored.
++ `msmarco-v1-passage-splade-pp-sd` (2.6G uncompressed): SPLADE++ CoCondenser-SelfDistil, minimal TF index.
++ `msmarco-v1-passage-splade-pp-sd-docvectors` (67G uncompressed): with docvectors stored.
++ `msmarco-v1-passage-splade-pp-sd-text` (13G uncompressed): with text stored.
+
+These indexes were generated on 2024/05/24 at Anserini commit [`a59610`](https://github.com/castorini/anserini/commit/a59610795cf612f9f16264c4f9267c8d05f3a2e9) on `tuna` with the following command:
+
+```bash
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /mnt/collections/msmarco/msmarco-passage-splade-pp-ed \
+  -index indexes/lucene-index.msmarco-v1-passage-splade-pp-ed.20230524.a59610/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.msmarco-v1-passage-splade-pp-ed.20230524.a59610 &
+
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /mnt/collections/msmarco/msmarco-passage-splade-pp-ed \
+  -index indexes/lucene-index.msmarco-v1-passage-splade-pp-ed-docvectors.20230524.a59610/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -storeDocvectors -optimize \
+  >& logs/log.msmarco-v1-passage-splade-pp-ed-docvectors.20230524.a59610 &
+
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /mnt/collections/msmarco/msmarco-passage-splade-pp-ed \
+  -index indexes/lucene-index.msmarco-v1-passage-splade-pp-ed-text.20230524.a59610/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -storeRaw -optimize \
+  >& logs/log.msmarco-v1-passage-splade-pp-ed-text.20230524.a59610 &
+
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /mnt/collections/msmarco/msmarco-passage-splade-pp-sd \
+  -index indexes/lucene-index.msmarco-v1-passage-splade-pp-sd.20230524.a59610/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -optimize \
+  >& logs/log.msmarco-v1-passage-splade-pp-sd.20230524.a59610 &
+
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /mnt/collections/msmarco/msmarco-passage-splade-pp-sd \
+  -index indexes/lucene-index.msmarco-v1-passage-splade-pp-sd-docvectors.20230524.a59610/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -storeDocvectors -optimize \
+  >& logs/log.msmarco-v1-passage-splade-pp-sd-docvectors.20230524.a59610 &
+
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /mnt/collections/msmarco/msmarco-passage-splade-pp-sd \
+  -index indexes/lucene-index.msmarco-v1-passage-splade-pp-sd-text.20230524.a59610/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -storeRaw -optimize \
+  >& logs/log.msmarco-v1-passage-splade-pp-sd-text.20230524.a59610 &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-unicoil-noexp.20220322.2f4058.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-unicoil-noexp.20220322.2f4058.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8b6c372cb9f64cd556ec13942f4c99dc9726f8a8
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-unicoil-noexp.20220322.2f4058.README.md
@@ -0,0 +1,15 @@
+# msmarco-v1-passage-unicoil-noexp
+
+Lucene impact index of the MS MARCO V1 passage corpus for uniCOIL (noexp).
+
+This index was generated on 2022/03/22 at Anserini commit [`2f4058`](https://github.com/castorini/anserini/commit/2f4058fbac852ec483c43e9e43ce9864db5a0027) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco-passage-unicoil-noexp/ \
+  -index indexes/lucene-index.msmarco-v1-passage-unicoil-noexp.20220322.2f4058/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 \
+  -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-unicoil.20220219.6a7080.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-unicoil.20220219.6a7080.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e1481ccbe6fcf32d5f7bee7ad044b983d098a82
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage-unicoil.20220219.6a7080.README.md
@@ -0,0 +1,14 @@
+# msmarco-v1-passage-unicoil
+
+Lucene impact index of the MS MARCO V1 passage corpus for uniCOIL.
+
+This index was generated on 2022/02/19 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/6a708047f71528f7d516c0dd45485204a36e6b1d) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco-passage-unicoil \
+  -index indexes/lucene-index.msmarco-v1-passage-unicoil.20220219.6a7080/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 16 -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage.20220131.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage.20220131.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d1ce4b1f20940432f5e6cf49b9269619efa7a158
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage.20220131.9ea315.README.md
@@ -0,0 +1,21 @@
+# msmarco-v1-passage
+
+Lucene index of the MS MARCO V1 passage corpus.
+
+This index was generated on 2022/01/31 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 9 \
+  -input /store/collections/msmarco/passage/ \
+  -index indexes/lucene-index.msmarco-v1-passage.20220131.9ea315/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v1-passage` (2.5G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-passage-slim` (616M uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-passage-full` (4.3G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage.20221004.252b5e.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage.20221004.252b5e.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6de41d6e0dca0ed64655f99ea5062cc33c5fca25
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v1-passage.20221004.252b5e.README.md
@@ -0,0 +1,31 @@
+# msmarco-v1-passage
+
+Lucene index of the MS MARCO V1 passage corpus.
+
+Note that there are three variants:
+
++ `msmarco-v1-passage` (2.6G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v1-passage-slim` (627M uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v1-passage-full` (4.3G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/10/04 at Anserini commit [`252b5e`](https://github.com/castorini/anserini/commit/252b5e2087dd7b3b994d41a444d4ae0044519819) on `tuna` with the following commands:
+
+```
+nohup target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 9 \
+  -input /tuna1/collections/msmarco/passage/ \
+  -index indexes/lucene-index.msmarco-v1-passage.20221004.252b5e/ \
+  -storeRaw -optimize >& logs/log.msmarco-v1-passage.20221004.252b5e &
+
+nohup target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 9 \
+  -input /tuna1/collections/msmarco/passage/ \
+  -index indexes/lucene-index.msmarco-v1-passage-slim.20221004.252b5e/ \
+  -optimize >& logs/log.msmarco-v1-passage-slim.20221004.252b5e &
+
+nohup target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 9 \
+  -input /tuna1/collections/msmarco/passage/ \
+  -index indexes/lucene-index.msmarco-v1-passage-full.20221004.252b5e/ \
+  -storePositions -storeDocvectors -storeRaw -optimize >& logs/log.msmarco-v1-passage-full.20221004.252b5e &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..405a8aedf99d5083b24755d54458482466419262
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v2-doc-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V2 document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a4f532dcacf819c565fdae2f5379637e7e541e03
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-doc-d2q-t5
+
+Lucene index of the MS MARCO V2 document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /store/collections/msmarco/msmarco_v2_doc_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9a12701b0254659aeb4fc93da3e9b6fb58c3d1d3
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.README.md
@@ -0,0 +1,26 @@
+# msmarco-v2-doc-d2q-t5
+
+Lucene index of the MS MARCO V2 document corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v2-doc-d2q-t5` (9.0G uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v2-doc-d2q-t5-docvectors` (59G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-d2q-t5.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-doc-d2q-t5.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-d2q-t5-docvectors.20220808.4d6d2a/ \
+  -storeDocvectors -optimize \
+  >& logs/log.msmarco-v2-doc-d2q-t5-docvectors.20220808.4d6d2a.txt &
+ ```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-full.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-full.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b5d513b58d95ef5981b3933183e0294838d1802e
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-full.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-doc-full
+
+Lucene index of the MS MARCO V2 document corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_doc/ \
+  -index indexes/lucene-index.msmarco-v2-doc-full.20220111.06fb4f/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc` (73G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-slim` (8.2G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-full` (132G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5813b9669b1b2b3dfd0cfa2e701a1b3f60e0aab1
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-v2-doc-seg-unicoil-noexp-0shot-b8 \
+ -index indexes/lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-v2-doc-per-passage.unicoil-noexp-0shot.20211012.58d286.tar.gz MD5 checksum = 1980db886d969c3393e4da20190eaa8f
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9cab351b491ac339bed7677f9f47c07e0be02978
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v2-doc-segmented-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V2 segmented document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4bb89c4eaf41fd72e6066e9dedfc380ef8aa8fab
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-doc-segmented-d2q-t5
+
+Lucene index of the MS MARCO V2 segmented document corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /store/collections/msmarco/msmarco_v2_doc_segmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..631e85c8242dafbaa0fbd8a22a6978e5dae3c4dc
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.README.md
@@ -0,0 +1,26 @@
+# msmarco-v2-doc-segmented-d2q-t5
+
+Lucene index of the MS MARCO V2 segmented document corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v2-doc-segmented-d2q-t5` (29G uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v2-doc-segmented-d2q-t5-docvectors` (130G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-doc-segmented-d2q-t5.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 24 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220808.4d6d2a/ \
+  -storeDocvectors -optimize \
+  >& logs/log.msmarco-v2-doc-segmented-d2q-t5-docvectors.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-full.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-full.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed75605287e1398147ed1b8d42b854caa10e3485
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-full.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-doc-segmented-full
+
+Lucene index of the MS MARCO V2 segmented document corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_doc_segmented/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-full.20220111.06fb4f/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc-segmented` (128G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-segmented-slim` (25G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-segmented-full` (217G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-slim.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-slim.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0f7ea6da2cf22048d1069bc4ca1389a5b422e0ff
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-slim.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-doc-segmented-slim
+
+Lucene index of the MS MARCO V2 segmented document corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_doc_segmented/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-slim.20220111.06fb4f/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc-segmented` (128G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-segmented-slim` (25G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-segmented-full` (217G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2.20220419.c47993.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2.20220419.c47993.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..04e16e185b0020c4eabef825e1148862db3bd0d1
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2.20220419.c47993.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-doc-segmented-unicoil-0shot-v2
+
+Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL with title prepended.
+
+This index was generated on 2022/04/19 at Pyserini commit [`c47993`](https://github.com/castorini/pyserini/commit/c47993aa2bebb8ab0a418214cfd299c0d0351c81) on `orca` with the following command:
+
+```
+python -m pyserini.index.lucene \
+  --collection JsonVectorCollection \
+  --input embeddings_msmarco-v2-doc-segmented-unicoil-0shot-v2 \
+  --index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2 \
+  --generator DefaultLuceneDocumentGenerator \
+  --threads 12 \
+  --impact --pretokenized --optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220219.6a7080.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220219.6a7080.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..89cee4cbb9dfbc875f6f3c2c45b91e2ce4d8640b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220219.6a7080.README.md
@@ -0,0 +1,14 @@
+# msmarco-v2-doc-segmented-unicoil-0shot
+
+Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL.
+
+This index was generated on 2022/02/19 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/6a708047f71528f7d516c0dd45485204a36e6b1d) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco_v2_doc_segmented_unicoil_0shot \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220219.6a7080/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..09d20888e080e26390914aaf5a0e68887b03e8dc
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-doc-segmented-unicoil-0shot
+
+Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL with title prepended.
+
+This index was generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following command:
+
+```bahs
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented_unicoil_0shot_v2 \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize \
+  >& logs/log.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.20220419.c47993.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.20220419.c47993.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e3f573d32dd2da65a5a242508224d683ec55d23b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.20220419.c47993.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
+
+Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL (noexp) with title prepended.
+
+This index was generated on 2022/04/19 at Pyserini commit [`c47993`](https://github.com/castorini/pyserini/commit/c47993aa2bebb8ab0a418214cfd299c0d0351c81) on `orca` with the following command:
+
+```
+python -m pyserini.index.lucene \
+  --collection JsonVectorCollection \
+  --input embeddings_msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 \
+  --index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 \
+  --generator DefaultLuceneDocumentGenerator \
+  --threads 12 \
+  --impact --pretokenized --optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220219.6a7080.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220219.6a7080.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6253e9ef7149a992183319541be25414ff9773d7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220219.6a7080.README.md
@@ -0,0 +1,14 @@
+# msmarco-v2-doc-segmented-unicoil-noexp-0shot
+
+Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL (noexp).
+
+This index was generated on 2022/02/19 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/6a708047f71528f7d516c0dd45485204a36e6b1d) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco_v2_doc_segmented_unicoil_noexp_0shot \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220219.6a7080/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2727c93143c25c327efb3bacb9ee4caf004bb249
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-doc-segmented-unicoil-noexp-0shot
+
+Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL (noexp) with title prepended.
+
+This index was generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following command:
+
+```bash
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize \
+  >& logs/log.msmarco-v2-doc-segmented-unicoil-noexp-0shot.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..04835ddbbe6e2b9d0f9c0db6ca312a3c9bec434e
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-doc-segmented
+
+Lucene index of the MS MARCO V2 segmented document corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_doc_segmented/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented.20220111.06fb4f/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc-segmented` (128G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-segmented-slim` (25G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-segmented-full` (217G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c64d860b9985c8073afc34258e313cf51ce1ad94
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a.README.md
@@ -0,0 +1,34 @@
+# msmarco-v2-doc-segmented
+
+Lucene index of the MS MARCO V2 segmented document corpus.
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc-segmented` (132G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-segmented-slim` (26G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-segmented-full` (233G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented.20220808.4d6d2a/ \
+  -storeRaw -optimize \
+  >& logs/log.msmarco-v2-doc-segmented.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-slim.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-doc-segmented-slim.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc_segmented/ \
+  -index indexes/lucene-index.msmarco-v2-doc-segmented-full.20220808.4d6d2a/ \
+  -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.msmarco-v2-doc-segmented-full.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-slim.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-slim.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a99f257ebf45f311c2df6a7358674a1c4104f180
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc-slim.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-doc-slim
+
+Lucene index of the MS MARCO V2 document corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_doc/ \
+  -index indexes/lucene-index.msmarco-v2-doc-slim.20220111.06fb4f/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc` (73G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-slim` (8.2G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-full` (132G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e9164f02d5386adeffe75b7a42bfb09d10069a7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-doc
+
+Lucene index of the MS MARCO V2 document corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_doc/ \
+  -index indexes/lucene-index.msmarco-v2-doc.20220111.06fb4f/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc` (73G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-slim` (8.2G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-full` (132G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..77c05dcafb1799f362d9753b9ce9d642ee70f19d
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-doc.20220808.4d6d2a.README.md
@@ -0,0 +1,34 @@
+# msmarco-v2-doc
+
+Lucene index of the MS MARCO V2 document corpus.
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-doc` (73G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-doc-slim` (8.0G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-doc-full` (132G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc/ \
+  -index indexes/lucene-index.msmarco-v2-doc.20220808.4d6d2a/ \
+  -storeRaw -optimize \
+  >& logs/log.msmarco-v2-doc.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc/ \
+  -index indexes/lucene-index.msmarco-v2-doc-slim.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-doc-slim.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2DocCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_doc/ \
+  -index indexes/lucene-index.msmarco-v2-doc-full.20220808.4d6d2a/ \
+  -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.msmarco-v2-doc-full.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..19538faf1e93bc36b0d066bbc4be3502707efac7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v2-passage-augmented-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V2 augmented passage corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_augmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..27fe35125aa6ecf3bf206f8b5ffcfd9164c9adbd
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-passage-augmented-d2q-t5
+
+Lucene index of the MS MARCO V2 augmented passage corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage_augmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..782e3fe140660d83ae5ed7e7b084d48c83db8b20
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.README.md
@@ -0,0 +1,26 @@
+# msmarco-v2-passage-augmented-d2q-t5
+
+Lucene index of the MS MARCO V2 augmented passage corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v2-passage-augmented-d2q-t5` (26G uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v2-passage-augmented-d2q-t5-docvectors` (111G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_augmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-passage-augmented-d2q-t5.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_augmented_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220808.4d6d2a/ \
+  -storeDocvectors -optimize \
+  >& logs/log.msmarco-v2-passage-augmented-d2q-t5-docvectors.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-full.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-full.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..ee627583a0691de7dd70ebeac6d0488d1b7e36cc
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-full.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-passage-augmented-full
+
+Lucene index of the MS MARCO V2 augmented passage corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage_augmented/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-full.20220111.06fb4f/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage-augmented` (82G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-augmented-slim` (18G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-augmented-full` (142G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-slim.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-slim.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..61e5d0090e304f689c7418c30f6407aa033077cf
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented-slim.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-passage-augmented-slim
+
+Lucene index of the MS MARCO V2 augmented passage corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage_augmented/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-slim.20220111.06fb4f/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage-augmented` (82G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-augmented-slim` (18G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-augmented-full` (142G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3aff53355dc5a138a0012515d3bc516f398ff5af
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-passage-augmented
+
+Lucene index of the MS MARCO V2 augmented passage corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage_augmented/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented.20220111.06fb4f/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage-augmented` (82G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-augmented-slim` (18G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-augmented-full` (142G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6be4bee58b5d5d4ae0a459d634584d035c2ed94b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a.README.md
@@ -0,0 +1,34 @@
+# msmarco-v2-passage-augmented
+
+Lucene index of the MS MARCO V2 augmented passage corpus.
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage-augmented` (93G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-augmented-slim` (20G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-augmented-full` (157G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_augmented/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented.20220808.4d6d2a/ \
+  -storeRaw -optimize \
+  >& logs/log.msmarco-v2-passage-augmented.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_augmented/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-slim.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-passage-augmented-slim.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_augmented/ \
+  -index indexes/lucene-index.msmarco-v2-passage-augmented-full.20220808.4d6d2a/ \
+  -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.msmarco-v2-passage-augmented-full.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220525.30c997.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220525.30c997.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..59fd7e47baf841cf1dc6ccbd5eb93634630c8740
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220525.30c997.README.md
@@ -0,0 +1,16 @@
+# msmarco-v2-passage-d2q-t5-docvectors
+
+Lucene index (+docvectors) of the MS MARCO V2 passage corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/05/25 at Anserini commit [`30c997`](https://github.com/castorini/anserini/commit/30c9974f495a06c94d576d0e9c2c5861515e0e19) on `damiano` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220525.30c997/ \
+  -storeDocvectors -optimize
+```
+
+Note that this index stores term frequencies along with the docvectors: bag-of-words queries and relevance feedback are supported, but not phrase queries.
+The raw text is not stored.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5.20220201.9ea315.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5.20220201.9ea315.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..37f9b289a37cbc346067860dcee02a3fe919e2c5
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5.20220201.9ea315.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-passage-d2q-t5
+
+Lucene index of the MS MARCO V2 passage corpus, with doc2query-T5 expansions.
+
+This index was generated on 2022/02/01 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/9ea3159adeeffd84e10e197af4c36febb5b74c7b) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-d2q-t5.20220201.9ea315/ \
+  -optimize
+```
+
+Note that this index stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. In addition, there is no way to fetch the raw text.
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..23c350f0b81ca6776ce77551c437b6bc40a94c08
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.README.md
@@ -0,0 +1,26 @@
+# msmarco-v2-passage-d2q-t5
+
+Lucene index of the MS MARCO V2 passage corpus, with doc2query-T5 expansions.
+
+Note that there are two variants:
+
++ `msmarco-v2-passage-d2q-t5` (19G uncompressed): stores term frequencies only, which supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text.
++ `msmarco-v2-passage-d2q-t5-docvectors` (71G uncompressed): stores term frequencies and the docvectors, which enables pseudo-relevance feedabck.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-d2q-t5.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-passage-d2q-t5.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_d2q-t5/ \
+  -index indexes/lucene-index.msmarco-v2-passage-d2q-t5-docvectors.20220808.4d6d2a/ \
+  -storeDocvectors -optimize \
+  >& logs/log.msmarco-v2-passage-d2q-t5-docvectors.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-full.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-full.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8fd87fdff81a9f0dc3326e890790f514906092c4
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-full.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-passage-full
+
+Lucene index of the MS MARCO V2 passage corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage/ \
+  -index indexes/lucene-index.msmarco-v2-passage-full.20220111.06fb4f/ \
+  -storePositions -storeDocvectors -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage` (45G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-slim` (11G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-full` (69G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "full" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-slim.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-slim.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e3f5e1b578987061a5547305f4aae5f2ac34ef83
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-slim.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-passage-slim
+
+Lucene index of the MS MARCO V2 passage corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage/ \
+  -index indexes/lucene-index.msmarco-v2-passage-slim.20220111.06fb4f/ \
+  -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage` (45G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-slim` (11G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-full` (69G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "slim" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-0shot.20220219.6a7080.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-0shot.20220219.6a7080.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..af3c94f3796f4c7c9dd34bb26ec39db9323caf7c
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-0shot.20220219.6a7080.README.md
@@ -0,0 +1,14 @@
+# msmarco-v2-passage-unicoil-0shot
+
+Lucene impact index of the MS MARCO V2 passage corpus for uniCOIL.
+
+This index was generated on 2022/02/19 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/6a708047f71528f7d516c0dd45485204a36e6b1d) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco_v2_passage_unicoil_0shot \
+  -index indexes/lucene-index.msmarco-v2-passage-unicoil-0shot.20220219.6a7080/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c837f8a06cf7f9f431c0d08b98d8752f8551ed4
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-passage-unicoil-0shot
+
+Lucene impact index of the MS MARCO V2 passage corpus for uniCOIL.
+
+This index was generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following command:
+
+```bash
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_unicoil_0shot \
+  -index indexes/lucene-index.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize \
+  >& logs/log.msmarco-v2-passage-unicoil-0shot.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220219.6a7080.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220219.6a7080.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a7959c822e1b3db3f60c818c54fc2f518fa598fa
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220219.6a7080.README.md
@@ -0,0 +1,14 @@
+# msmarco-v2-passage-unicoil-noexp-0shot
+
+Lucene impact index of the MS MARCO V2 passage corpus for uniCOIL (noexp).
+
+This index was generated on 2022/02/19 at Anserini commit [`9ea315`](https://github.com/castorini/anserini/commit/6a708047f71528f7d516c0dd45485204a36e6b1d) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /store/collections/msmarco/msmarco_v2_passage_unicoil_noexp_0shot \
+  -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220219.6a7080/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..97dd1db97231532bcb9aff56ede3f73789bd381b
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.README.md
@@ -0,0 +1,15 @@
+# msmarco-v2-passage-unicoil-noexp-0shot
+
+Lucene impact index of the MS MARCO V2 passage corpus for uniCOIL (noexp).
+
+This index was generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following command:
+
+```bash
+nohup target/appassembler/bin/IndexCollection \
+  -collection JsonVectorCollection \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage_unicoil_noexp_0shot \
+  -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a/ \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 18 -impact -pretokenized -optimize \
+  >& logs/log.msmarco-v2-passage-unicoil-noexp-0shot.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.20220111.06fb4f.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.20220111.06fb4f.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9cbb2e23e868f75a140cb60dda4cf12fdc9236d8
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.20220111.06fb4f.README.md
@@ -0,0 +1,21 @@
+# msmarco-v2-passage
+
+Lucene index of the MS MARCO V2 passage corpus.
+
+This index was generated on 2022/01/11 at Anserini commit [`06fb4f`](https://github.com/castorini/anserini/commit/06fb4f9947ff2167c276d8893287453af7680786) on `orca` with the following command:
+
+```
+target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /store/collections/msmarco/msmarco_v2_passage/ \
+  -index indexes/lucene-index.msmarco-v2-passage.20220111.06fb4f/ \
+  -storeRaw -optimize
+```
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage` (45G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-slim` (11G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-full` (69G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+This is the "default" version.
\ No newline at end of file
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.20220808.4d6d2a.README.md b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.20220808.4d6d2a.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ea429f61c9a94bd5cd7cbeb325455e7b9ffc214
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.20220808.4d6d2a.README.md
@@ -0,0 +1,34 @@
+# msmarco-v2-passage
+
+Lucene index of the MS MARCO V2 passage corpus.
+
+Note that there are three variants of this index:
+
++ `msmarco-v2-passage` (48G uncompressed): the "default" version, which stores term frequencies and the raw text. This supports bag-of-words queries, but no phrase queries and no relevance feedback.
++ `msmarco-v2-passage-slim` (13G uncompressed): the "slim" version, which stores term frequencies only. This supports bag-of-words queries, but no phrase queries and no relevance feedback. There is no way to fetch the raw text from this index.
++ `msmarco-v2-passage-full` (72G uncompressed): the "full" version, which stores term frequencies, term positions, document vectors, and the raw text. This supports bag-of-words queries, phrase queries, and relevance feedback.
+
+These indexes were generated on 2022/08/08 at Anserini commit [`fbe35e`](https://github.com/castorini/anserini/commit/4d6d2a5a367424131331df2a8e9e00e6a9c68856) on `damiano` with the following commands:
+
+```bash
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage/ \
+  -index indexes/lucene-index.msmarco-v2-passage.20220808.4d6d2a/ \
+  -storeRaw -optimize \
+  >& logs/log.msmarco-v2-passage.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage/ \
+  -index indexes/lucene-index.msmarco-v2-passage-slim.20220808.4d6d2a/ \
+  -optimize \
+  >& logs/log.msmarco-v2-passage-slim.20220808.4d6d2a.txt &
+
+nohup target/appassembler/bin/IndexCollection -collection MsMarcoV2PassageCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 18 \
+  -input /scratch2/collections/msmarco/msmarco_v2_passage/ \
+  -index indexes/lucene-index.msmarco-v2-passage-full.20220808.4d6d2a/ \
+  -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.msmarco-v2-passage-full.20220808.4d6d2a.txt &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fea735fec452f7c8f1edea2ebb9ce79b5699aaa7
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.unicoil-noexp-0shot.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-v2-passage-unicoil-noexp-0shot-b8 \
+ -index indexes/lucene-index.msmarco-v2-passage.unicoil-noexp-0shot.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-v2-passage.unicoil-noexp-0shot.20211012.58d286.tar.gz MD5 checksum = 8886a8d9599838bc6d8d61464da61086
diff --git a/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.unicoil-tilde.20211012.58d286.readme.txt b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.unicoil-tilde.20211012.58d286.readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e772713ce6ff6135182ca0984bf05b7ef67bb53
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.msmarco-v2-passage.unicoil-tilde.20211012.58d286.readme.txt
@@ -0,0 +1,12 @@
+This index was generated on 2021/10/12 at commit 58d286c3f9fe845e261c271f2a0f514462844d97 (2021/10/05)
+with the following command:
+
+python -m pyserini.index -collection JsonVectorCollection \
+ -input collections/msmarco-v2-passage-unicoil-tilde-expansion-b8/ \
+ -index indexes/lucene-index.msmarco-v2-passage.unicoil-tilde.20211012.58d286 \
+ -generator DefaultLuceneDocumentGenerator -impact -pretokenized \
+ -threads 36 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+lucene-index.msmarco-v2-passage.unicoil-tilde.20211012.58d286.tar.gz MD5 checksum = 562f9534eefe04ab8c07beb304074d41
diff --git a/pyserini/resources/index-metadata/lucene-index.neuclir22-en.20221025.c4a8d0.README.md b/pyserini/resources/index-metadata/lucene-index.neuclir22-en.20221025.c4a8d0.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1936a3fc7cb1fbe552525e0d4a4ff572e801bf54
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.neuclir22-en.20221025.c4a8d0.README.md
@@ -0,0 +1,34 @@
+# NeuCLIR 2022 Indexes (English)
+
+Lucene indexes for the NeuCLIR 2022 corpora (official English translations from Persian, Russian, and Chinese).
+
+These indexes was generated on 2022/10/25 at Anserini commit [`c4a8d0`](https://github.com/castorini/anserini/commit/c4a8d00e3c218ed89dca8a4e51c3b2c7d577db00) on `tuna` with the following commands:
+
+```bash
+# NeuCLIR22 fa -> en
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/neuclir22-fa-en \
+  -index indexes/lucene-index.neuclir22-fa-en.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.neuclir22-fa-en.20221025.c4a8d0 &
+
+# NeuCLIR22 ru -> en
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/neuclir22-ru-en \
+  -index indexes/lucene-index.neuclir22-ru-en.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.neuclir22-ru-en.20221025.c4a8d0 &
+
+# NeuCLIR22 zh -> en
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/neuclir22-zh-en \
+  -index indexes/lucene-index.neuclir22-zh-en.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -optimize \
+  >& logs/log.neuclir22-zh-en.20221025.c4a8d0 &
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.neuclir22-fa.20220719.71c120.README.md b/pyserini/resources/index-metadata/lucene-index.neuclir22-fa.20220719.71c120.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8811a43297f7a06ddebb610d2463ae60f265b93a
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.neuclir22-fa.20220719.71c120.README.md
@@ -0,0 +1,14 @@
+# neuclir22-fa
+
+Lucene index for Neuclir22 (Persian).
+
+This index was generated on 2022/07/19 at Anserini commit [`71c120`](https://github.com/castorini/anserini/commit/71c1200d36ce17615cf4da510ac4ef2d2f0121f6) on `orca` with the following command:
+
+
+```
+target/appassembler/bin/IndexCollection -collection NeuClirCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 8 \
+  -input /store/collections/multilingual/neuclir22-fa \
+  -index indexes/lucene-index.neuclir22-fa.20220719.71c120 \
+  -storePositions -storeDocvectors -storeRaw -optimize -language fa
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.neuclir22-ru.20220719.71c120.README.md b/pyserini/resources/index-metadata/lucene-index.neuclir22-ru.20220719.71c120.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..81445ff3336a7629ce702e5cda912c19031f5eb5
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.neuclir22-ru.20220719.71c120.README.md
@@ -0,0 +1,14 @@
+# neuclir22-ru
+
+Lucene index for Neuclir22 (Russian).
+
+This index was generated on 2022/07/19 at Anserini commit [`71c120`](https://github.com/castorini/anserini/commit/71c1200d36ce17615cf4da510ac4ef2d2f0121f6) on `orca` with the following command:
+
+
+```
+target/appassembler/bin/IndexCollection -collection NeuClirCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 8 \
+  -input /store/collections/multilingual/neuclir22-ru \
+  -index indexes/lucene-index.neuclir22-ru.20220719.71c120 \
+  -storePositions -storeDocvectors -storeRaw -optimize -language ru
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.neuclir22-zh.20220719.71c120.README.md b/pyserini/resources/index-metadata/lucene-index.neuclir22-zh.20220719.71c120.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..908ec4242bc70a2933e0aba9dceb85514abc0520
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.neuclir22-zh.20220719.71c120.README.md
@@ -0,0 +1,14 @@
+# neuclir22-zh
+
+Lucene index for Neuclir22 (Chinese).
+
+This index was generated on 2022/07/19 at Anserini commit [`71c120`](https://github.com/castorini/anserini/commit/71c1200d36ce17615cf4da510ac4ef2d2f0121f6) on `orca` with the following command:
+
+
+```
+target/appassembler/bin/IndexCollection -collection NeuClirCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 8 \
+  -input /store/collections/multilingual/neuclir22-zh \
+  -index indexes/lucene-index.neuclir22-zh.20220719.71c120 \
+  -storePositions -storeDocvectors -storeRaw -optimize -language zh
+```
diff --git a/pyserini/resources/index-metadata/lucene-index.neuclir22.20221025.c4a8d0.README.md b/pyserini/resources/index-metadata/lucene-index.neuclir22.20221025.c4a8d0.README.md
new file mode 100644
index 0000000000000000000000000000000000000000..892efb9d39ea3e48b10876170f355c87cad30423
--- /dev/null
+++ b/pyserini/resources/index-metadata/lucene-index.neuclir22.20221025.c4a8d0.README.md
@@ -0,0 +1,34 @@
+# NeuCLIR 2022 Indexes
+
+Lucene indexes for the NeuCLIR 2022 corpora (Persian, Russian, and Chinese).
+
+These indexes was generated on 2022/10/25 at Anserini commit [`c4a8d0`](https://github.com/castorini/anserini/commit/c4a8d00e3c218ed89dca8a4e51c3b2c7d577db00) on `tuna` with the following commands:
+
+```bash
+# NeuCLIR22 fa
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/neuclir22-fa \
+  -index indexes/lucene-index.neuclir22-fa.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -language fa -optimize \
+  >& logs/log.neuclir22-fa.20221025.c4a8d0 &
+
+# NeuCLIR22 ru
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/neuclir22-ru \
+  -index indexes/lucene-index.neuclir22-ru.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -language ru -optimize \
+  >& logs/log.neuclir22-ru.20221025.c4a8d0 &
+
+# NeuCLIR22 zh
+nohup target/appassembler/bin/IndexCollection \
+  -collection NeuClirCollection \
+  -input /tuna1/collections/multilingual/neuclir22-zh \
+  -index indexes/lucene-index.neuclir22-zh.20221025.c4a8d0 \
+  -generator DefaultLuceneDocumentGenerator \
+  -threads 8 -storePositions -storeDocvectors -storeRaw -language zh -optimize \
+  >& logs/log.neuclir22-zh.20221025.c4a8d0 &
+```
diff --git a/pyserini/resources/jars/.placeholder b/pyserini/resources/jars/.placeholder
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pyserini/resources/jars/anserini-0.21.1-SNAPSHOT-fatjar.jar b/pyserini/resources/jars/anserini-0.21.1-SNAPSHOT-fatjar.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6277a16e9697f2a8f3358763d26bf6ed2cddf5d8
--- /dev/null
+++ b/pyserini/resources/jars/anserini-0.21.1-SNAPSHOT-fatjar.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b33e0e837fe46235da136869148853d411be0adf64fe6753c4aa294746df9a88
+size 145665105
diff --git a/pyserini/resources/naturalquestion.yaml b/pyserini/resources/naturalquestion.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e8050af038b762844b122a8da6eb7b9d2ed989b9
--- /dev/null
+++ b/pyserini/resources/naturalquestion.yaml
@@ -0,0 +1,66 @@
+conditions:
+  - model_name: BM25-k1_0.9_b_0.4
+    command: 
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics nq-test --output $output --bm25 --k1 0.9 --b 0.4
+    scores:
+      - Top5: 44.82
+        Top20: 64.02
+        Top100: 79.20
+        Top500: 86.59
+        Top1000: 88.95
+  - model_name: BM25-k1_0.9_b_0.4_dpr-topics
+    command: 
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics dpr-nq-test --output $output --bm25 --k1 0.9 --b 0.4
+    scores:
+      - Top5: 43.77
+        Top20: 62.99
+        Top100: 78.23
+        Top500: 85.60
+        Top1000: 88.01
+  - model_name: GarT5-RRF
+    command: 
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics nq-test-gar-t5-answers --output $output --bm25 --k1 0.9 --b 0.4
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics nq-test-gar-t5-titles --output $output --bm25 --k1 0.9 --b 0.4
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics nq-test-gar-t5-sentences --output $output --bm25 --k1 0.9 --b 0.4
+    scores:
+      - Top5: 64.62
+        Top20: 77.17
+        Top100: 86.90
+        Top500: 91.63
+        Top1000: 92.91
+  - model_name: DPR
+    command: 
+      - python -m pyserini.search.faiss --threads 72 --batch-size 128 --index wikipedia-dpr-100w.dpr-single-nq --encoder facebook/dpr-question_encoder-single-nq-base --topics nq-test --output $output
+    scores:
+      - Top5: 68.61
+        Top20: 80.58 
+        Top100: 86.68
+        Top500: 90.91
+        Top1000: 91.83
+  - model_name: DPR-DKRR
+    command: 
+      - 'python -m pyserini.search.faiss --threads 72 --batch-size 128 --index wikipedia-dpr-100w.dkrr-nq --encoder castorini/dkrr-dpr-nq-retriever --topics nq-test --output $output --query-prefix question: '
+    scores:
+      - Top5: 73.80
+        Top20: 84.27
+        Top100: 89.34
+        Top500: 92.24
+        Top1000: 93.43
+  - model_name: DPR-Hybrid
+    command: 
+      - python -m pyserini.search.hybrid dense --index wikipedia-dpr-100w.dpr-single-nq --encoder facebook/dpr-question_encoder-single-nq-base sparse --index wikipedia-dpr-100w fusion --alpha 1.2 run --topics nq-test --output $output --threads 72 --batch-size 128
+    scores:
+      - Top5: 72.52
+        Top20: 83.43
+        Top100: 89.03
+        Top500: 92.16
+        Top1000: 93.19
+  - model_name: GarT5RRF-DKRR-RRF
+    command:
+      - ''
+    scores:
+      - Top5: 74.57
+        Top20: 84.90
+        Top100: 90.86
+        Top500: 93.35
+        Top1000: 94.18
diff --git a/pyserini/resources/triviaqa.yaml b/pyserini/resources/triviaqa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a6d466a3045a4180532f7c75097c763a448dffba
--- /dev/null
+++ b/pyserini/resources/triviaqa.yaml
@@ -0,0 +1,66 @@
+conditions:
+  - model_name: BM25-k1_0.9_b_0.4
+    command: 
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics dpr-trivia-test --output $output --bm25 --k1 0.9 --b 0.4
+    scores:
+      - Top5: 66.29
+        Top20: 76.41
+        Top100: 83.14
+        Top500: 87.35
+        Top1000: 88.50
+  - model_name: BM25-k1_0.9_b_0.4_dpr-topics
+    command: 
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics dpr-trivia-test --output $output --bm25 --k1 0.9 --b 0.4
+    scores:
+      - Top5: 66.29
+        Top20: 76.41
+        Top100: 83.14
+        Top500: 87.35
+        Top1000: 88.50
+  - model_name: GarT5-RRF
+    command: 
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics dpr-trivia-test-gar-t5-answers --output $output --bm25 --k1 0.9 --b 0.4
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics dpr-trivia-test-gar-t5-titles --output $output --bm25 --k1 0.9 --b 0.4
+      - python -m pyserini.search.lucene --threads 72 --batch-size 128 --index wikipedia-dpr-100w --topics dpr-trivia-test-gar-t5-sentences --output $output --bm25 --k1 0.9 --b 0.4
+    scores:
+      - Top5: 72.82
+        Top20: 80.66
+        Top100: 85.95
+        Top500: 89.07
+        Top1000: 90.06
+  - model_name: DPR
+    command: 
+      - python -m pyserini.search.faiss --threads 72 --batch-size 128 --index wikipedia-dpr-100w.dpr-multi --encoder facebook/dpr-question_encoder-multiset-base --topics dpr-trivia-test --output $output
+    scores:
+      - Top5: 69.80
+        Top20: 78.87 
+        Top100: 84.79
+        Top500: 88.19
+        Top1000: 89.30
+  - model_name: DPR-DKRR
+    command: 
+      - 'python -m pyserini.search.faiss --threads 72 --batch-size 128 --index wikipedia-dpr-100w.dkrr-tqa --encoder castorini/dkrr-dpr-tqa-retriever --topics dpr-trivia-test --output $output --query-prefix question: '
+    scores:
+      - Top5: 77.23
+        Top20: 83.74
+        Top100: 87.78
+        Top500: 89.87
+        Top1000: 90.63
+  - model_name: DPR-Hybrid
+    command: 
+      - python -m pyserini.search.hybrid dense --index wikipedia-dpr-100w.dpr-multi --encoder facebook/dpr-question_encoder-multiset-base sparse --index wikipedia-dpr-100w fusion --alpha 0.95 run --topics dpr-trivia-test --output $output --threads 72 --batch-size 128
+    scores:
+      - Top5: 76.01
+        Top20: 82.64
+        Top100: 86.55
+        Top500: 89.12
+        Top1000: 89.90
+  - model_name: GarT5RRF-DKRR-RRF
+    command:
+      - ''
+    scores:
+      - Top5: 78.63
+        Top20: 85.02
+        Top100: 88.41
+        Top500: 90.29 
+        Top1000: 90.83
\ No newline at end of file
diff --git a/pyserini/search/__init__.py b/pyserini/search/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8679e46c62311bd8aee6a125579c024e1391a677
--- /dev/null
+++ b/pyserini/search/__init__.py
@@ -0,0 +1,61 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import JQuery, JQueryGenerator, JDisjunctionMaxQueryGenerator, get_topics,\
+    get_topics_with_reader, get_qrels_file, get_qrels
+from .lucene import JLuceneSearcherResult, LuceneSimilarities, LuceneFusionSearcher, LuceneSearcher
+from .lucene import JImpactSearcherResult, LuceneImpactSearcher
+from ._deprecated import SimpleSearcher, ImpactSearcher, SimpleFusionSearcher
+
+from .faiss import DenseSearchResult, PRFDenseSearchResult, FaissSearcher, BinaryDenseSearcher, QueryEncoder, \
+    DprQueryEncoder, BprQueryEncoder, DkrrDprQueryEncoder, TctColBertQueryEncoder, AnceQueryEncoder, AggretrieverQueryEncoder, AutoQueryEncoder
+from .faiss import AnceEncoder
+from .faiss import DenseVectorAveragePrf, DenseVectorRocchioPrf, DenseVectorAncePrf
+
+
+__all__ = ['JQuery',
+           'LuceneSimilarities',
+           'LuceneFusionSearcher',
+           'LuceneSearcher',
+           'JLuceneSearcherResult',
+           'LuceneImpactSearcher',
+           'JImpactSearcherResult',
+           'JDisjunctionMaxQueryGenerator',
+           'JQueryGenerator',
+           'get_topics',
+           'get_topics_with_reader',
+           'get_qrels_file',
+           'get_qrels',
+           'SimpleSearcher',
+           'ImpactSearcher',
+           'SimpleFusionSearcher',
+           'DenseSearchResult',
+           'PRFDenseSearchResult',
+           'FaissSearcher',
+           'BinaryDenseSearcher',
+           'QueryEncoder',
+           'DprQueryEncoder',
+           'BprQueryEncoder',
+           'DkrrDprQueryEncoder',
+           'TctColBertQueryEncoder',
+           'AnceEncoder',
+           'AnceQueryEncoder',
+           'AggretrieverQueryEncoder',
+           'AutoQueryEncoder',
+           'DenseVectorAveragePrf',
+           'DenseVectorRocchioPrf',
+           'DenseVectorAncePrf']
+
diff --git a/pyserini/search/__main__.py b/pyserini/search/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..57e4826ce2700d8cdee229f50e02fc0fbdc57507
--- /dev/null
+++ b/pyserini/search/__main__.py
@@ -0,0 +1,25 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import os
+
+print('WARNING: directly calling pyserini.search is deprecated, please use pyserini.search.lucene instead')
+args = " ".join(sys.argv[1:])
+os.system(f'python -m pyserini.search.lucene {args}')
+
+
+
diff --git a/pyserini/search/__pycache__/__init__.cpython-310.pyc b/pyserini/search/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..69017ff61d166faee6586b3834558069376da184
Binary files /dev/null and b/pyserini/search/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/search/__pycache__/_base.cpython-310.pyc b/pyserini/search/__pycache__/_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12fc2097c810320962ff2f6c746c098cba7304ca
Binary files /dev/null and b/pyserini/search/__pycache__/_base.cpython-310.pyc differ
diff --git a/pyserini/search/__pycache__/_deprecated.cpython-310.pyc b/pyserini/search/__pycache__/_deprecated.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7371a7bfe552215e41a3e8a81cef92ce5acc10e9
Binary files /dev/null and b/pyserini/search/__pycache__/_deprecated.cpython-310.pyc differ
diff --git a/pyserini/search/_base.py b/pyserini/search/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e8cac4e2c2c1676885a0baa6aac672127c8eddc
--- /dev/null
+++ b/pyserini/search/_base.py
@@ -0,0 +1,560 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python search interface to Anserini. The main entry point is the ``LuceneSearcher``
+class, which wraps the Java class with the same name in Anserini.
+"""
+
+import logging
+import os
+
+from pyserini.util import get_cache_home
+from pyserini.pyclass import autoclass
+
+logger = logging.getLogger(__name__)
+
+# Wrappers around Lucene classes
+JQuery = autoclass('org.apache.lucene.search.Query')
+JPath = autoclass('java.nio.file.Path')
+
+# Wrappers around Anserini classes
+JQrels = autoclass('io.anserini.eval.Qrels')
+JRelevanceJudgments = autoclass('io.anserini.eval.RelevanceJudgments')
+JTopicReader = autoclass('io.anserini.search.topicreader.TopicReader')
+JTopics = autoclass('io.anserini.search.topicreader.Topics')
+JQueryGenerator = autoclass('io.anserini.search.query.QueryGenerator')
+JBagOfWordsQueryGenerator = autoclass('io.anserini.search.query.BagOfWordsQueryGenerator')
+JDisjunctionMaxQueryGenerator = autoclass('io.anserini.search.query.DisjunctionMaxQueryGenerator')
+JCovid19QueryGenerator = autoclass('io.anserini.search.query.Covid19QueryGenerator')
+
+topics_mapping = {
+    'trec1-adhoc': JTopics.TREC1_ADHOC,
+    'trec2-adhoc': JTopics.TREC2_ADHOC,
+    'trec3-adhoc': JTopics.TREC3_ADHOC,
+    'robust04': JTopics.ROBUST04,
+    'robust05': JTopics.ROBUST05,
+    'core17': JTopics.CORE17,
+    'core18': JTopics.CORE18,
+    'wt10g': JTopics.WT10G,
+    'trec2004-terabyte': JTopics.TREC2004_TERABYTE,
+    'trec2005-terabyte': JTopics.TREC2005_TERABYTE,
+    'trec2006-terabyte': JTopics.TREC2006_TERABYTE,
+    'trec2007-million-query': JTopics.TREC2007_MILLION_QUERY,
+    'trec2008-million-query': JTopics.TREC2008_MILLION_QUERY,
+    'trec2009-million-query': JTopics.TREC2009_MILLION_QUERY,
+    'trec2010-web': JTopics.TREC2010_WEB,
+    'trec2011-web': JTopics.TREC2011_WEB,
+    'trec2012-web': JTopics.TREC2012_WEB,
+    'trec2013-web': JTopics.TREC2013_WEB,
+    'trec2014-web': JTopics.TREC2014_WEB,
+    'mb11': JTopics.MB11,
+    'mb12': JTopics.MB12,
+    'mb13': JTopics.MB13,
+    'mb14': JTopics.MB14,
+    'car17v1.5-benchmarkY1test': JTopics.CAR17V15_BENCHMARK_Y1_TEST,
+    'car17v2.0-benchmarkY1test': JTopics.CAR17V20_BENCHMARK_Y1_TEST,
+    'dl19-doc': JTopics.TREC2019_DL_DOC,
+    'dl19-doc-unicoil': JTopics.TREC2019_DL_DOC_UNICOIL,
+    'dl19-doc-unicoil-noexp': JTopics.TREC2019_DL_DOC_UNICOIL_NOEXP,
+    'dl19-passage': JTopics.TREC2019_DL_PASSAGE,
+    'dl19-passage-unicoil': JTopics.TREC2019_DL_PASSAGE_UNICOIL,
+    'dl19-passage-unicoil-noexp': JTopics.TREC2019_DL_PASSAGE_UNICOIL_NOEXP,
+    'dl20': JTopics.TREC2020_DL,
+    'dl20-unicoil': JTopics.TREC2020_DL_UNICOIL,
+    'dl20-unicoil-noexp': JTopics.TREC2020_DL_UNICOIL_NOEXP,
+    'dl21': JTopics.TREC2021_DL,
+    'dl21-unicoil': JTopics.TREC2021_DL_UNICOIL,
+    'dl21-unicoil-noexp': JTopics.TREC2021_DL_UNICOIL_NOEXP,
+    'msmarco-doc-dev': JTopics.MSMARCO_DOC_DEV,
+    'msmarco-doc-dev-unicoil': JTopics.MSMARCO_DOC_DEV_UNICOIL,
+    'msmarco-doc-dev-unicoil-noexp': JTopics.MSMARCO_DOC_DEV_UNICOIL_NOEXP,
+    'msmarco-doc-test': JTopics.MSMARCO_DOC_TEST,
+    'msmarco-passage-dev-subset': JTopics.MSMARCO_PASSAGE_DEV_SUBSET,
+    'msmarco-passage-dev-subset-deepimpact': JTopics.MSMARCO_PASSAGE_DEV_SUBSET_DEEPIMPACT,
+    'msmarco-passage-dev-subset-unicoil': JTopics.MSMARCO_PASSAGE_DEV_SUBSET_UNICOIL,
+    'msmarco-passage-dev-subset-unicoil-noexp': JTopics.MSMARCO_PASSAGE_DEV_SUBSET_UNICOIL_NOEXP,
+    'msmarco-passage-dev-subset-unicoil-tilde': JTopics.MSMARCO_PASSAGE_DEV_SUBSET_UNICOIL_TILDE,
+    'msmarco-passage-dev-subset-distill-splade-max': JTopics.MSMARCO_PASSAGE_DEV_SUBSET_DISTILL_SPLADE_MAX,
+    'msmarco-passage-test-subset': JTopics.MSMARCO_PASSAGE_TEST_SUBSET,
+    'msmarco-v2-doc-dev': JTopics.MSMARCO_V2_DOC_DEV,
+    'msmarco-v2-doc-dev-unicoil': JTopics.MSMARCO_V2_DOC_DEV_UNICOIL,
+    'msmarco-v2-doc-dev-unicoil-noexp': JTopics.MSMARCO_V2_DOC_DEV_UNICOIL_NOEXP,
+    'msmarco-v2-doc-dev2': JTopics.MSMARCO_V2_DOC_DEV2,
+    'msmarco-v2-doc-dev2-unicoil': JTopics.MSMARCO_V2_DOC_DEV2_UNICOIL,
+    'msmarco-v2-doc-dev2-unicoil-noexp': JTopics.MSMARCO_V2_DOC_DEV2_UNICOIL_NOEXP,
+    'msmarco-v2-passage-dev': JTopics.MSMARCO_V2_PASSAGE_DEV,
+    'msmarco-v2-passage-dev-unicoil': JTopics.MSMARCO_V2_PASSAGE_DEV_UNICOIL,
+    'msmarco-v2-passage-dev-unicoil-noexp': JTopics.MSMARCO_V2_PASSAGE_DEV_UNICOIL_NOEXP,
+    'msmarco-v2-passage-dev2': JTopics.MSMARCO_V2_PASSAGE_DEV2,
+    'msmarco-v2-passage-dev2-unicoil': JTopics.MSMARCO_V2_PASSAGE_DEV2_UNICOIL,
+    'msmarco-v2-passage-dev2-unicoil-noexp': JTopics.MSMARCO_V2_PASSAGE_DEV2_UNICOIL_NOEXP,
+    'ntcir8-zh': JTopics.NTCIR8_ZH,
+    'clef2006-fr': JTopics.CLEF2006_FR,
+    'trec2002-ar': JTopics.TREC2002_AR,
+    'fire2012-bn': JTopics.FIRE2012_BN,
+    'fire2012-hi': JTopics.FIRE2012_HI,
+    'fire2012-en': JTopics.FIRE2012_EN,
+    'covid-round1': JTopics.COVID_ROUND1,
+    'covid-round1-udel': JTopics.COVID_ROUND1_UDEL,
+    'covid-round2': JTopics.COVID_ROUND2,
+    'covid-round2-udel': JTopics.COVID_ROUND2_UDEL,
+    'covid-round3': JTopics.COVID_ROUND3,
+    'covid-round3-udel': JTopics.COVID_ROUND3_UDEL,
+    'covid-round4': JTopics.COVID_ROUND4,
+    'covid-round4-udel': JTopics.COVID_ROUND4_UDEL,
+    'covid-round5': JTopics.COVID_ROUND5,
+    'covid-round5-udel': JTopics.COVID_ROUND5_UDEL,
+    'trec2018-bl': JTopics.TREC2018_BL,
+    'trec2019-bl': JTopics.TREC2019_BL,
+    'trec2020-bl': JTopics.TREC2020_BL,
+    'epidemic-qa-expert-prelim': JTopics.EPIDEMIC_QA_EXPERT_PRELIM,
+    'epidemic-qa-consumer-prelim': JTopics.EPIDEMIC_QA_CONSUMER_PRELIM,
+    'dpr-nq-dev': JTopics.DPR_NQ_DEV,
+    'dpr-nq-test': JTopics.DPR_NQ_TEST,
+    'dpr-trivia-dev': JTopics.DPR_TRIVIA_DEV,
+    'dpr-trivia-test': JTopics.DPR_TRIVIA_TEST,
+    'dpr-wq-test': JTopics.DPR_WQ_TEST,
+    'dpr-squad-test': JTopics.DPR_SQUAD_TEST,
+    'dpr-curated-test': JTopics.DPR_CURATED_TEST,
+    'dpr-trivia-test-gar-t5-answers': JTopics.DPR_TRIVIA_TEST_GART5_ANSWERS,
+    'dpr-trivia-test-gar-t5-titles': JTopics.DPR_TRIVIA_TEST_GART5_TITLES,
+    'dpr-trivia-test-gar-t5-sentences': JTopics.DPR_TRIVIA_TEST_GART5_SENTENCES,
+    'dpr-trivia-test-gar-t5-all': JTopics.DPR_TRIVIA_TEST_GART5_ALL,
+    'nq-test-gar-t5-answers': JTopics.NQ_TEST_GART5_ANSWERS,
+    'nq-test-gar-t5-titles': JTopics.NQ_TEST_GART5_TITLES,
+    'nq-test-gar-t5-sentences': JTopics.NQ_TEST_GART5_SENTENCES,
+    'nq-test-gar-t5-all': JTopics.NQ_TEST_GART5_ALL,
+    'nq-dev': JTopics.NQ_DEV,
+    'nq-test': JTopics.NQ_TEST,
+    'mrtydi-v1.1-arabic-train': JTopics.MRTYDI_V11_AR_TRAIN,
+    'mrtydi-v1.1-arabic-dev': JTopics.MRTYDI_V11_AR_DEV,
+    'mrtydi-v1.1-arabic-test': JTopics.MRTYDI_V11_AR_TEST,
+    'mrtydi-v1.1-bengali-train': JTopics.MRTYDI_V11_BN_TRAIN,
+    'mrtydi-v1.1-bengali-dev': JTopics.MRTYDI_V11_BN_DEV,
+    'mrtydi-v1.1-bengali-test': JTopics.MRTYDI_V11_BN_TEST,
+    'mrtydi-v1.1-english-train': JTopics.MRTYDI_V11_EN_TRAIN,
+    'mrtydi-v1.1-english-dev': JTopics.MRTYDI_V11_EN_DEV,
+    'mrtydi-v1.1-english-test': JTopics.MRTYDI_V11_EN_TEST,
+    'mrtydi-v1.1-finnish-train': JTopics.MRTYDI_V11_FI_TRAIN,
+    'mrtydi-v1.1-finnish-dev': JTopics.MRTYDI_V11_FI_DEV,
+    'mrtydi-v1.1-finnish-test': JTopics.MRTYDI_V11_FI_TEST,
+    'mrtydi-v1.1-indonesian-train': JTopics.MRTYDI_V11_ID_TRAIN,
+    'mrtydi-v1.1-indonesian-dev': JTopics.MRTYDI_V11_ID_DEV,
+    'mrtydi-v1.1-indonesian-test': JTopics.MRTYDI_V11_ID_TEST,
+    'mrtydi-v1.1-japanese-train': JTopics.MRTYDI_V11_JA_TRAIN,
+    'mrtydi-v1.1-japanese-dev': JTopics.MRTYDI_V11_JA_DEV,
+    'mrtydi-v1.1-japanese-test': JTopics.MRTYDI_V11_JA_TEST,
+    'mrtydi-v1.1-korean-train': JTopics.MRTYDI_V11_KO_TRAIN,
+    'mrtydi-v1.1-korean-dev': JTopics.MRTYDI_V11_KO_DEV,
+    'mrtydi-v1.1-korean-test': JTopics.MRTYDI_V11_KO_TEST,
+    'mrtydi-v1.1-russian-train': JTopics.MRTYDI_V11_RU_TRAIN,
+    'mrtydi-v1.1-russian-dev': JTopics.MRTYDI_V11_RU_DEV,
+    'mrtydi-v1.1-russian-test': JTopics.MRTYDI_V11_RU_TEST,
+    'mrtydi-v1.1-swahili-train': JTopics.MRTYDI_V11_SW_TRAIN,
+    'mrtydi-v1.1-swahili-dev': JTopics.MRTYDI_V11_SW_DEV,
+    'mrtydi-v1.1-swahili-test': JTopics.MRTYDI_V11_SW_TEST,
+    'mrtydi-v1.1-telugu-train': JTopics.MRTYDI_V11_TE_TRAIN,
+    'mrtydi-v1.1-telugu-dev': JTopics.MRTYDI_V11_TE_DEV,
+    'mrtydi-v1.1-telugu-test': JTopics.MRTYDI_V11_TE_TEST,
+    'mrtydi-v1.1-thai-train': JTopics.MRTYDI_V11_TH_TRAIN,
+    'mrtydi-v1.1-thai-dev': JTopics.MRTYDI_V11_TH_DEV,
+    'mrtydi-v1.1-thai-test': JTopics.MRTYDI_V11_TH_TEST,
+    'beir-v1.0.0-trec-covid-test': JTopics.BEIR_V1_0_0_TREC_COVID_TEST,
+    'beir-v1.0.0-bioasq-test': JTopics.BEIR_V1_0_0_BIOASQ_TEST,
+    'beir-v1.0.0-nfcorpus-test': JTopics.BEIR_V1_0_0_NFCORPUS_TEST,
+    'beir-v1.0.0-nq-test': JTopics.BEIR_V1_0_0_NQ_TEST,
+    'beir-v1.0.0-hotpotqa-test': JTopics.BEIR_V1_0_0_HOTPOTQA_TEST,
+    'beir-v1.0.0-fiqa-test': JTopics.BEIR_V1_0_0_FIQA_TEST,
+    'beir-v1.0.0-signal1m-test': JTopics.BEIR_V1_0_0_SIGNAL1M_TEST,
+    'beir-v1.0.0-trec-news-test': JTopics.BEIR_V1_0_0_TREC_NEWS_TEST,
+    'beir-v1.0.0-robust04-test': JTopics.BEIR_V1_0_0_ROBUST04_TEST,
+    'beir-v1.0.0-arguana-test': JTopics.BEIR_V1_0_0_ARGUANA_TEST,
+    'beir-v1.0.0-webis-touche2020-test': JTopics.BEIR_V1_0_0_WEBIS_TOUCHE2020_TEST,
+    'beir-v1.0.0-cqadupstack-android-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_ANDROID_TEST,
+    'beir-v1.0.0-cqadupstack-english-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_ENGLISH_TEST,
+    'beir-v1.0.0-cqadupstack-gaming-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_GAMING_TEST,
+    'beir-v1.0.0-cqadupstack-gis-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_GIS_TEST,
+    'beir-v1.0.0-cqadupstack-mathematica-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_TEST,
+    'beir-v1.0.0-cqadupstack-physics-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_PHYSICS_TEST,
+    'beir-v1.0.0-cqadupstack-programmers-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_TEST,
+    'beir-v1.0.0-cqadupstack-stats-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_STATS_TEST,
+    'beir-v1.0.0-cqadupstack-tex-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_TEX_TEST,
+    'beir-v1.0.0-cqadupstack-unix-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_UNIX_TEST,
+    'beir-v1.0.0-cqadupstack-webmasters-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_TEST,
+    'beir-v1.0.0-cqadupstack-wordpress-test': JTopics.BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_TEST,
+    'beir-v1.0.0-quora-test': JTopics.BEIR_V1_0_0_QUORA_TEST,
+    'beir-v1.0.0-dbpedia-entity-test': JTopics.BEIR_V1_0_0_DBPEDIA_ENTITY_TEST,
+    'beir-v1.0.0-scidocs-test': JTopics.BEIR_V1_0_0_SCIDOCS_TEST,
+    'beir-v1.0.0-fever-test': JTopics.BEIR_V1_0_0_FEVER_TEST,
+    'beir-v1.0.0-climate-fever-test': JTopics.BEIR_V1_0_0_CLIMATE_FEVER_TEST,
+    'beir-v1.0.0-scifact-test': JTopics.BEIR_V1_0_0_SCIFACT_TEST,
+    'beir-v1.0.0-trec-covid-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_TREC_COVID_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-bioasq-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_BIOASQ_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-nfcorpus-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_NFCORPUS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-nq-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_NQ_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-hotpotqa-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_HOTPOTQA_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-fiqa-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_FIQA_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-signal1m-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_SIGNAL1M_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-trec-news-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_TREC_NEWS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-robust04-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_ROBUST04_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-arguana-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_ARGUANA_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-webis-touche2020-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_WEBIS_TOUCHE2020_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-android-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_ANDROID_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-english-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_ENGLISH_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-gaming-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_GAMING_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-gis-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_GIS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-mathematica-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-physics-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_PHYSICS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-programmers-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-stats-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_STATS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-tex-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_TEX_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-unix-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_UNIX_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-webmasters-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-cqadupstack-wordpress-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-quora-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_QUORA_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-dbpedia-entity-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_DBPEDIA_ENTITY_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-scidocs-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_SCIDOCS_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-fever-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_FEVER_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-climate-fever-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_CLIMATE_FEVER_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'beir-v1.0.0-scifact-test-splade_distil_cocodenser_medium': JTopics.BEIR_V1_0_0_SCIFACT_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM,
+    'hc4-v1.0-fa-dev-title': JTopics.HC4_V1_0_FA_DEV_TITLE,
+    'hc4-v1.0-fa-dev-desc': JTopics.HC4_V1_0_FA_DEV_DESC,
+    'hc4-v1.0-fa-dev-desc-title': JTopics.HC4_V1_0_FA_DEV_DESC_TITLE,
+    'hc4-v1.0-fa-test-title': JTopics.HC4_V1_0_FA_TEST_TITLE,
+    'hc4-v1.0-fa-test-desc': JTopics.HC4_V1_0_FA_TEST_DESC,
+    'hc4-v1.0-fa-test-desc-title': JTopics.HC4_V1_0_FA_TEST_DESC_TITLE,
+    'hc4-v1.0-fa-en-test-title': JTopics.HC4_V1_0_FA_EN_TEST_TITLE,
+    'hc4-v1.0-fa-en-test-desc': JTopics.HC4_V1_0_FA_EN_TEST_DESC,
+    'hc4-v1.0-fa-en-test-desc-title': JTopics.HC4_V1_0_FA_EN_TEST_DESC_TITLE,
+    'hc4-v1.0-ru-dev-title': JTopics.HC4_V1_0_RU_DEV_TITLE,
+    'hc4-v1.0-ru-dev-desc': JTopics.HC4_V1_0_RU_DEV_DESC,
+    'hc4-v1.0-ru-dev-desc-title': JTopics.HC4_V1_0_RU_DEV_DESC_TITLE,
+    'hc4-v1.0-ru-test-title': JTopics.HC4_V1_0_RU_TEST_TITLE,
+    'hc4-v1.0-ru-test-desc': JTopics.HC4_V1_0_RU_TEST_DESC,
+    'hc4-v1.0-ru-test-desc-title': JTopics.HC4_V1_0_RU_TEST_DESC_TITLE,
+    'hc4-v1.0-ru-en-test-title': JTopics.HC4_V1_0_RU_EN_TEST_TITLE,
+    'hc4-v1.0-ru-en-test-desc': JTopics.HC4_V1_0_RU_EN_TEST_DESC,
+    'hc4-v1.0-ru-en-test-desc-title': JTopics.HC4_V1_0_RU_EN_TEST_DESC_TITLE,
+    'hc4-v1.0-zh-dev-title': JTopics.HC4_V1_0_ZH_DEV_TITLE,
+    'hc4-v1.0-zh-dev-desc': JTopics.HC4_V1_0_ZH_DEV_DESC,
+    'hc4-v1.0-zh-dev-desc-title': JTopics.HC4_V1_0_ZH_DEV_DESC_TITLE,
+    'hc4-v1.0-zh-test-title': JTopics.HC4_V1_0_ZH_TEST_TITLE,
+    'hc4-v1.0-zh-test-desc': JTopics.HC4_V1_0_ZH_TEST_DESC,
+    'hc4-v1.0-zh-test-desc-title': JTopics.HC4_V1_0_ZH_TEST_DESC_TITLE,
+    'hc4-v1.0-zh-en-test-title': JTopics.HC4_V1_0_ZH_EN_TEST_TITLE,
+    'hc4-v1.0-zh-en-test-desc': JTopics.HC4_V1_0_ZH_EN_TEST_DESC,
+    'hc4-v1.0-zh-en-test-desc-title': JTopics.HC4_V1_0_ZH_EN_TEST_DESC_TITLE,
+    # NeuCLIR 2022 topics
+    'neuclir22-en-title':         JTopics.NEUCLIR22_EN_TITLE,
+    'neuclir22-en-desc':          JTopics.NEUCLIR22_EN_DESC,
+    'neuclir22-en-desc-title':    JTopics.NEUCLIR22_EN_DESC_TITLE,
+    'neuclir22-fa-ht-title':      JTopics.NEUCLIR22_FA_HT_TITLE,
+    'neuclir22-fa-ht-desc':       JTopics.NEUCLIR22_FA_HT_DESC,
+    'neuclir22-fa-ht-desc-title': JTopics.NEUCLIR22_FA_HT_DESC_TITLE,
+    'neuclir22-fa-mt-title':      JTopics.NEUCLIR22_FA_MT_TITLE,
+    'neuclir22-fa-mt-desc':       JTopics.NEUCLIR22_FA_MT_DESC,
+    'neuclir22-fa-mt-desc-title': JTopics.NEUCLIR22_FA_MT_DESC_TITLE,
+    'neuclir22-ru-ht-title':      JTopics.NEUCLIR22_RU_HT_TITLE,
+    'neuclir22-ru-ht-desc':       JTopics.NEUCLIR22_RU_HT_DESC,
+    'neuclir22-ru-ht-desc-title': JTopics.NEUCLIR22_RU_HT_DESC_TITLE,
+    'neuclir22-ru-mt-title':      JTopics.NEUCLIR22_RU_MT_TITLE,
+    'neuclir22-ru-mt-desc':       JTopics.NEUCLIR22_RU_MT_DESC,
+    'neuclir22-ru-mt-desc-title': JTopics.NEUCLIR22_RU_MT_DESC_TITLE,
+    'neuclir22-zh-ht-title':      JTopics.NEUCLIR22_ZH_HT_TITLE,
+    'neuclir22-zh-ht-desc':       JTopics.NEUCLIR22_ZH_HT_DESC,
+    'neuclir22-zh-ht-desc-title': JTopics.NEUCLIR22_ZH_HT_DESC_TITLE,
+    'neuclir22-zh-mt-title':      JTopics.NEUCLIR22_ZH_MT_TITLE,
+    'neuclir22-zh-mt-desc':       JTopics.NEUCLIR22_ZH_MT_DESC,
+    'neuclir22-zh-mt-desc-title': JTopics.NEUCLIR22_ZH_MT_DESC_TITLE,
+    # MIRACL topics
+    'miracl-v1.0-ar-dev': JTopics.MIRACL_V10_AR_DEV,
+    'miracl-v1.0-bn-dev': JTopics.MIRACL_V10_BN_DEV,
+    'miracl-v1.0-en-dev': JTopics.MIRACL_V10_EN_DEV,
+    'miracl-v1.0-es-dev': JTopics.MIRACL_V10_ES_DEV,
+    'miracl-v1.0-fa-dev': JTopics.MIRACL_V10_FA_DEV,
+    'miracl-v1.0-fi-dev': JTopics.MIRACL_V10_FI_DEV,
+    'miracl-v1.0-fr-dev': JTopics.MIRACL_V10_FR_DEV,
+    'miracl-v1.0-hi-dev': JTopics.MIRACL_V10_HI_DEV,
+    'miracl-v1.0-id-dev': JTopics.MIRACL_V10_ID_DEV,
+    'miracl-v1.0-ja-dev': JTopics.MIRACL_V10_JA_DEV,
+    'miracl-v1.0-ko-dev': JTopics.MIRACL_V10_KO_DEV,
+    'miracl-v1.0-ru-dev': JTopics.MIRACL_V10_RU_DEV,
+    'miracl-v1.0-sw-dev': JTopics.MIRACL_V10_SW_DEV,
+    'miracl-v1.0-te-dev': JTopics.MIRACL_V10_TE_DEV,
+    'miracl-v1.0-th-dev': JTopics.MIRACL_V10_TH_DEV,
+    'miracl-v1.0-zh-dev': JTopics.MIRACL_V10_ZH_DEV,
+    'miracl-v1.0-de-dev': JTopics.MIRACL_V10_DE_DEV,
+    'miracl-v1.0-yo-dev': JTopics.MIRACL_V10_YO_DEV,
+}
+
+qrels_mapping = {
+    'trec1-adhoc': JQrels.TREC1_ADHOC,
+    'trec2-adhoc': JQrels.TREC2_ADHOC,
+    'trec3-adhoc': JQrels.TREC3_ADHOC,
+    'robust04': JQrels.ROBUST04,
+    'robust05': JQrels.ROBUST05,
+    'core17': JQrels.CORE17,
+    'core18': JQrels.CORE18,
+    'wt10g': JQrels.WT10G,
+    'trec2004-terabyte': JQrels.TREC2004_TERABYTE,
+    'trec2005-terabyte': JQrels.TREC2005_TERABYTE,
+    'trec2006-terabyte': JQrels.TREC2006_TERABYTE,
+    'trec2011-web': JQrels.TREC2011_WEB,
+    'trec2012-web': JQrels.TREC2012_WEB,
+    'trec2013-web': JQrels.TREC2013_WEB,
+    'trec2014-web': JQrels.TREC2014_WEB,
+    'mb11': JQrels.MB11,
+    'mb12': JQrels.MB12,
+    'mb13': JQrels.MB13,
+    'mb14': JQrels.MB14,
+    'car17v1.5-benchmarkY1test': JQrels.CAR17V15_BENCHMARK_Y1_TEST,
+    'car17v2.0-benchmarkY1test': JQrels.CAR17V20_BENCHMARK_Y1_TEST,
+    'dl19-doc': JQrels.TREC2019_DL_DOC,
+    'dl19-passage': JQrels.TREC2019_DL_PASSAGE,
+    'dl20-doc': JQrels.TREC2020_DL_DOC,
+    'dl20-passage': JQrels.TREC2020_DL_PASSAGE,
+    'dl21-doc': JQrels.TREC2021_DL_DOC,
+    'dl21-passage': JQrels.TREC2021_DL_PASSAGE,
+    'msmarco-doc-dev': JQrels.MSMARCO_DOC_DEV,
+    'msmarco-passage-dev-subset': JQrels.MSMARCO_PASSAGE_DEV_SUBSET,
+    'msmarco-v2-doc-dev': JQrels.MSMARCO_V2_DOC_DEV,
+    'msmarco-v2-doc-dev2': JQrels.MSMARCO_V2_DOC_DEV2,
+    'msmarco-v2-passage-dev': JQrels.MSMARCO_V2_PASSAGE_DEV,
+    'msmarco-v2-passage-dev2': JQrels.MSMARCO_V2_PASSAGE_DEV2,
+    'ntcir8-zh': JQrels.NTCIR8_ZH,
+    'clef2006-fr': JQrels.CLEF2006_FR,
+    'trec2002-ar': JQrels.TREC2002_AR,
+    'fire2012-bn': JQrels.FIRE2012_BN,
+    'fire2012-hi': JQrels.FIRE2012_HI,
+    'fire2012-en': JQrels.FIRE2012_EN,
+    'covid-complete': JQrels.COVID_COMPLETE,
+    'covid-round1': JQrels.COVID_ROUND1,
+    'covid-round2': JQrels.COVID_ROUND2,
+    'covid-round3': JQrels.COVID_ROUND3,
+    'covid-round3-cumulative': JQrels.COVID_ROUND3_CUMULATIVE,
+    'covid-round4': JQrels.COVID_ROUND4,
+    'covid-round4-cumulative': JQrels.COVID_ROUND4_CUMULATIVE,
+    'covid-round5': JQrels.COVID_ROUND5,
+    'trec2018-bl': JQrels.TREC2018_BL,
+    'trec2019-bl': JQrels.TREC2019_BL,
+    'trec2020-bl': JQrels.TREC2020_BL,
+    'mrtydi-v1.1-arabic-train': JQrels.MRTYDI_V11_AR_TRAIN,
+    'mrtydi-v1.1-arabic-dev': JQrels.MRTYDI_V11_AR_DEV,
+    'mrtydi-v1.1-arabic-test': JQrels.MRTYDI_V11_AR_TEST,
+    'mrtydi-v1.1-bengali-train': JQrels.MRTYDI_V11_BN_TRAIN,
+    'mrtydi-v1.1-bengali-dev': JQrels.MRTYDI_V11_BN_DEV,
+    'mrtydi-v1.1-bengali-test': JQrels.MRTYDI_V11_BN_TEST,
+    'mrtydi-v1.1-english-train': JQrels.MRTYDI_V11_EN_TRAIN,
+    'mrtydi-v1.1-english-dev': JQrels.MRTYDI_V11_EN_DEV,
+    'mrtydi-v1.1-english-test': JQrels.MRTYDI_V11_EN_TEST,
+    'mrtydi-v1.1-finnish-train': JQrels.MRTYDI_V11_FI_TRAIN,
+    'mrtydi-v1.1-finnish-dev': JQrels.MRTYDI_V11_FI_DEV,
+    'mrtydi-v1.1-finnish-test': JQrels.MRTYDI_V11_FI_TEST,
+    'mrtydi-v1.1-indonesian-train': JQrels.MRTYDI_V11_ID_TRAIN,
+    'mrtydi-v1.1-indonesian-dev': JQrels.MRTYDI_V11_ID_DEV,
+    'mrtydi-v1.1-indonesian-test': JQrels.MRTYDI_V11_ID_TEST,
+    'mrtydi-v1.1-japanese-train': JQrels.MRTYDI_V11_JA_TRAIN,
+    'mrtydi-v1.1-japanese-dev': JQrels.MRTYDI_V11_JA_DEV,
+    'mrtydi-v1.1-japanese-test': JQrels.MRTYDI_V11_JA_TEST,
+    'mrtydi-v1.1-korean-train': JQrels.MRTYDI_V11_KO_TRAIN,
+    'mrtydi-v1.1-korean-dev': JQrels.MRTYDI_V11_KO_DEV,
+    'mrtydi-v1.1-korean-test': JQrels.MRTYDI_V11_KO_TEST,
+    'mrtydi-v1.1-russian-train': JQrels.MRTYDI_V11_RU_TRAIN,
+    'mrtydi-v1.1-russian-dev': JQrels.MRTYDI_V11_RU_DEV,
+    'mrtydi-v1.1-russian-test': JQrels.MRTYDI_V11_RU_TEST,
+    'mrtydi-v1.1-swahili-train': JQrels.MRTYDI_V11_SW_TRAIN,
+    'mrtydi-v1.1-swahili-dev': JQrels.MRTYDI_V11_SW_DEV,
+    'mrtydi-v1.1-swahili-test': JQrels.MRTYDI_V11_SW_TEST,
+    'mrtydi-v1.1-telugu-train': JQrels.MRTYDI_V11_TE_TRAIN,
+    'mrtydi-v1.1-telugu-dev': JQrels.MRTYDI_V11_TE_DEV,
+    'mrtydi-v1.1-telugu-test': JQrels.MRTYDI_V11_TE_TEST,
+    'mrtydi-v1.1-thai-train': JQrels.MRTYDI_V11_TH_TRAIN,
+    'mrtydi-v1.1-thai-dev': JQrels.MRTYDI_V11_TH_DEV,
+    'mrtydi-v1.1-thai-test': JQrels.MRTYDI_V11_TH_TEST,
+    'beir-v1.0.0-trec-covid-test': JQrels.BEIR_V1_0_0_TREC_COVID_TEST,
+    'beir-v1.0.0-bioasq-test': JQrels.BEIR_V1_0_0_BIOASQ_TEST,
+    'beir-v1.0.0-nfcorpus-test': JQrels.BEIR_V1_0_0_NFCORPUS_TEST,
+    'beir-v1.0.0-nq-test': JQrels.BEIR_V1_0_0_NQ_TEST,
+    'beir-v1.0.0-hotpotqa-test': JQrels.BEIR_V1_0_0_HOTPOTQA_TEST,
+    'beir-v1.0.0-fiqa-test': JQrels.BEIR_V1_0_0_FIQA_TEST,
+    'beir-v1.0.0-signal1m-test': JQrels.BEIR_V1_0_0_SIGNAL1M_TEST,
+    'beir-v1.0.0-trec-news-test': JQrels.BEIR_V1_0_0_TREC_NEWS_TEST,
+    'beir-v1.0.0-robust04-test': JQrels.BEIR_V1_0_0_ROBUST04_TEST,
+    'beir-v1.0.0-arguana-test': JQrels.BEIR_V1_0_0_ARGUANA_TEST,
+    'beir-v1.0.0-webis-touche2020-test': JQrels.BEIR_V1_0_0_WEBIS_TOUCHE2020_TEST,
+    'beir-v1.0.0-cqadupstack-android-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_ANDROID_TEST,
+    'beir-v1.0.0-cqadupstack-english-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_ENGLISH_TEST,
+    'beir-v1.0.0-cqadupstack-gaming-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_GAMING_TEST,
+    'beir-v1.0.0-cqadupstack-gis-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_GIS_TEST,
+    'beir-v1.0.0-cqadupstack-mathematica-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_TEST,
+    'beir-v1.0.0-cqadupstack-physics-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_PHYSICS_TEST,
+    'beir-v1.0.0-cqadupstack-programmers-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_TEST,
+    'beir-v1.0.0-cqadupstack-stats-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_STATS_TEST,
+    'beir-v1.0.0-cqadupstack-tex-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_TEX_TEST,
+    'beir-v1.0.0-cqadupstack-unix-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_UNIX_TEST,
+    'beir-v1.0.0-cqadupstack-webmasters-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_TEST,
+    'beir-v1.0.0-cqadupstack-wordpress-test': JQrels.BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_TEST,
+    'beir-v1.0.0-quora-test': JQrels.BEIR_V1_0_0_QUORA_TEST,
+    'beir-v1.0.0-dbpedia-entity-test': JQrels.BEIR_V1_0_0_DBPEDIA_ENTITY_TEST,
+    'beir-v1.0.0-scidocs-test': JQrels.BEIR_V1_0_0_SCIDOCS_TEST,
+    'beir-v1.0.0-fever-test': JQrels.BEIR_V1_0_0_FEVER_TEST,
+    'beir-v1.0.0-climate-fever-test': JQrels.BEIR_V1_0_0_CLIMATE_FEVER_TEST,
+    'beir-v1.0.0-scifact-test': JQrels.BEIR_V1_0_0_SCIFACT_TEST,
+    'hc4-v1.0-fa-dev': JQrels.HC4_V1_0_FA_DEV,
+    'hc4-v1.0-fa-test': JQrels.HC4_V1_0_FA_TEST,
+    'hc4-v1.0-ru-dev': JQrels.HC4_V1_0_RU_DEV,
+    'hc4-v1.0-ru-test': JQrels.HC4_V1_0_RU_TEST,
+    'hc4-v1.0-zh-dev': JQrels.HC4_V1_0_ZH_DEV,
+    'hc4-v1.0-zh-test': JQrels.HC4_V1_0_ZH_TEST,
+    'hc4-neuclir22-fa-test': JQrels.HC4_NEUCLIR22_FA_TEST,
+    'hc4-neuclir22-ru-test': JQrels.HC4_NEUCLIR22_RU_TEST,
+    'hc4-neuclir22-zh-test': JQrels.HC4_NEUCLIR22_ZH_TEST,
+    'miracl-v1.0-ar-dev': JQrels.MIRACL_V10_AR_DEV,
+    'miracl-v1.0-bn-dev': JQrels.MIRACL_V10_BN_DEV,
+    'miracl-v1.0-en-dev': JQrels.MIRACL_V10_EN_DEV,
+    'miracl-v1.0-es-dev': JQrels.MIRACL_V10_ES_DEV,
+    'miracl-v1.0-fa-dev': JQrels.MIRACL_V10_FA_DEV,
+    'miracl-v1.0-fi-dev': JQrels.MIRACL_V10_FI_DEV,
+    'miracl-v1.0-fr-dev': JQrels.MIRACL_V10_FR_DEV,
+    'miracl-v1.0-hi-dev': JQrels.MIRACL_V10_HI_DEV,
+    'miracl-v1.0-id-dev': JQrels.MIRACL_V10_ID_DEV,
+    'miracl-v1.0-ja-dev': JQrels.MIRACL_V10_JA_DEV,
+    'miracl-v1.0-ko-dev': JQrels.MIRACL_V10_KO_DEV,
+    'miracl-v1.0-ru-dev': JQrels.MIRACL_V10_RU_DEV,
+    'miracl-v1.0-sw-dev': JQrels.MIRACL_V10_SW_DEV,
+    'miracl-v1.0-te-dev': JQrels.MIRACL_V10_TE_DEV,
+    'miracl-v1.0-th-dev': JQrels.MIRACL_V10_TH_DEV,
+    'miracl-v1.0-zh-dev': JQrels.MIRACL_V10_ZH_DEV,
+    'miracl-v1.0-de-dev': JQrels.MIRACL_V10_DE_DEV,
+    'miracl-v1.0-yo-dev': JQrels.MIRACL_V10_YO_DEV,
+}
+
+
+def get_topics(collection_name):
+    """
+    Parameters
+    ----------
+    collection_name : str
+        collection_name
+
+    Returns
+    -------
+    result : dictionary
+        Topics as a dictionary
+    """
+    if collection_name not in topics_mapping:
+        raise ValueError(f'Topic {collection_name} Not Found')
+
+    topics = JTopicReader.getTopicsWithStringIds(topics_mapping[collection_name])
+
+    t = {}
+    for topic in topics.keySet().toArray():
+        
+        if topic.isdigit():
+            # parse the keys into integers
+            topic_key = int(topic)
+        else:
+            topic_key = topic
+            
+        t[topic_key] = {}
+        for key in topics.get(topic).keySet().toArray():
+            t[topic_key][key] = topics.get(topic).get(key)
+    return t
+
+
+def get_topics_with_reader(reader_class, file):
+    # Yes, this is an insanely ridiculous method name.
+    topics = JTopicReader.getTopicsWithStringIdsFromFileWithTopicReaderClass(reader_class, file)
+    if topics is None:
+        raise ValueError(f'Unable to initialize TopicReader {reader_class} with file {file}!')
+
+    t = {}
+    for topic in topics.keySet().toArray():
+        
+        if topic.isdigit():
+            # parse the keys into integers
+            topic_key = int(topic)
+        else:
+            topic_key = topic
+            
+        t[topic_key] = {}
+        for key in topics.get(topic).keySet().toArray():
+            t[topic_key][key] = topics.get(topic).get(key)
+    return t
+
+
+def get_qrels_file(collection_name):
+    """
+    Parameters
+    ----------
+    collection_name : str
+        collection_name
+
+    Returns
+    -------
+    path : str
+        path of the qrels file
+    """
+    if collection_name in qrels_mapping:
+        qrels = qrels_mapping[collection_name]
+        target_path = JRelevanceJudgments.getQrelsPath(JPath.of(qrels.path)).toString()
+        if os.path.exists(target_path):
+            return target_path
+        target_dir = os.path.split(target_path)[0]
+        if not os.path.exists(target_dir):
+            os.makedirs(target_dir)
+        with open(target_path, 'w') as file:
+            qrels_content = JRelevanceJudgments.getQrelsResource(JPath.of(target_path))
+            file.write(qrels_content)
+        return target_path
+
+    raise FileNotFoundError(f'no qrels file for {collection_name}')
+
+
+def get_qrels(collection_name):
+    """
+    Parameters
+    ----------
+    collection_name : str
+        collection_name
+
+    Returns
+    -------
+    result : dictionary
+        qrels as a dictionary
+    """
+    file_path = get_qrels_file(collection_name)
+    qrels = {}
+    with open(file_path, 'r') as f:
+        for line in f:
+            qid, _, docid, judgement = line.rstrip().split()
+            
+            if qid.isdigit():
+                qrels_key = int(qid)
+            else:
+                qrels_key = qid
+                
+            if docid.isdigit():
+                doc_key = int(docid)
+            else:
+                doc_key = docid
+                
+            if qrels_key in qrels:
+                qrels[qrels_key][doc_key] = judgement
+            else:
+                qrels[qrels_key] = {doc_key: judgement}
+    return qrels
diff --git a/pyserini/search/_deprecated.py b/pyserini/search/_deprecated.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d877b086659df95c4e6d493765ec5d9ed2cd0c7
--- /dev/null
+++ b/pyserini/search/_deprecated.py
@@ -0,0 +1,38 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyserini.search.lucene import LuceneImpactSearcher, LuceneSearcher, LuceneFusionSearcher
+
+
+class SimpleSearcher(LuceneSearcher):
+    def __new__(cls, *args, **kwargs):
+        print('SimpleSearcher class has been deprecated, '
+              'please use LuceneSearcher from pyserini.search.lucene instead')
+        return super().__new__(cls)
+
+
+class ImpactSearcher(LuceneImpactSearcher):
+    def __new__(cls, *args, **kwargs):
+        print('ImpactSearcher class has been deprecated, '
+              'please use LuceneImpactSearcher from pyserini.search.lucene instead')
+        return super().__new__(cls)
+
+
+class SimpleFusionSearcher(LuceneFusionSearcher):
+    def __new__(cls, *args, **kwargs):
+        print('SimpleFusionSearcher class has been deprecated, '
+              'please use LuceneFusionSearcher from pyserini.search.lucene instead')
+        return super().__new__(cls)
diff --git a/pyserini/search/faiss/__init__.py b/pyserini/search/faiss/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cb896ef1b21731a66851902e9ee80641a807517
--- /dev/null
+++ b/pyserini/search/faiss/__init__.py
@@ -0,0 +1,25 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._searcher import DenseSearchResult, PRFDenseSearchResult, FaissSearcher, BinaryDenseSearcher, QueryEncoder, \
+    DprQueryEncoder, BprQueryEncoder, DkrrDprQueryEncoder, TctColBertQueryEncoder, AnceQueryEncoder, AggretrieverQueryEncoder, AutoQueryEncoder
+
+from ._model import AnceEncoder
+from._prf import DenseVectorAveragePrf, DenseVectorRocchioPrf, DenseVectorAncePrf
+
+__all__ = ['DenseSearchResult', 'PRFDenseSearchResult', 'FaissSearcher', 'BinaryDenseSearcher', 'QueryEncoder',
+           'DprQueryEncoder', 'BprQueryEncoder', 'DkrrDprQueryEncoder', 'TctColBertQueryEncoder', 'AnceEncoder',
+           'AnceQueryEncoder', 'AggretrieverQueryEncoder', 'AutoQueryEncoder', 'DenseVectorAveragePrf', 'DenseVectorRocchioPrf', 'DenseVectorAncePrf']
diff --git a/pyserini/search/faiss/__main__.py b/pyserini/search/faiss/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..88d5090bbe3de58b28f184617c9791367a60409d
--- /dev/null
+++ b/pyserini/search/faiss/__main__.py
@@ -0,0 +1,296 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import os
+from typing import OrderedDict
+
+from tqdm import tqdm
+
+from pyserini.search import FaissSearcher, BinaryDenseSearcher, TctColBertQueryEncoder, QueryEncoder, \
+    DprQueryEncoder, BprQueryEncoder, DkrrDprQueryEncoder, AnceQueryEncoder, AggretrieverQueryEncoder, AutoQueryEncoder, DenseVectorAveragePrf, \
+    DenseVectorRocchioPrf, DenseVectorAncePrf
+
+from pyserini.encode import PcaEncoder
+from pyserini.query_iterator import get_query_iterator, TopicsFormat
+from pyserini.output_writer import get_output_writer, OutputFormat
+from pyserini.search.lucene import LuceneSearcher
+
+# from ._prf import DenseVectorAveragePrf, DenseVectorRocchioPrf
+
+# Fixes this error: "OMP: Error #15: Initializing libomp.a, but found libomp.dylib already initialized."
+# https://stackoverflow.com/questions/53014306/error-15-initializing-libiomp5-dylib-but-found-libiomp5-dylib-already-initial
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+
+
+def define_dsearch_args(parser):
+    parser.add_argument('--index', type=str, metavar='path to index or index name', required=True,
+                        help="Path to Faiss index or name of prebuilt index.")
+    parser.add_argument('--encoder-class', type=str, metavar='which query encoder class to use. `default` would infer from the args.encoder',
+                        required=False,
+                        choices=["dkrr", "dpr", "bpr", "tct_colbert", "ance", "sentence", "contriever", "auto", "aggretriever"],
+                        default=None,
+                        help='which query encoder class to use. `default` would infer from the args.encoder')
+    parser.add_argument('--encoder', type=str, metavar='path to query encoder checkpoint or encoder name',
+                        required=False,
+                        help="Path to query encoder pytorch checkpoint or hgf encoder model name")
+    parser.add_argument('--tokenizer', type=str, metavar='name or path',
+                        required=False,
+                        help="Path to a hgf tokenizer name or path")
+    parser.add_argument('--encoded-queries', type=str, metavar='path to query encoded queries dir or queries name',
+                        required=False,
+                        help="Path to query encoder pytorch checkpoint or hgf encoder model name")
+    parser.add_argument('--pca-model', type=str, metavar='path', required=False,
+                        default=None, help="Path to a faiss pca model")
+    parser.add_argument('--device', type=str, metavar='device to run query encoder', required=False, default='cpu',
+                        help="Device to run query encoder, cpu or [cuda:0, cuda:1, ...]")
+    parser.add_argument('--query-prefix', type=str, metavar='str', required=False, default=None,
+                        help="Query prefix if exists.")
+    parser.add_argument('--searcher', type=str, metavar='str', required=False, default='simple',
+                        help="dense searcher type")
+    parser.add_argument('--prf-depth', type=int, metavar='num of passages used for PRF', required=False, default=0,
+                        help="Specify how many passages are used for PRF, 0: Simple retrieval with no PRF, > 0: perform PRF")
+    parser.add_argument('--prf-method', type=str, metavar='avg or rocchio', required=False, default='avg',
+                        help="Choose PRF methods, avg or rocchio")
+    parser.add_argument('--rocchio-alpha', type=float, metavar='alpha parameter for rocchio', required=False,
+                        default=0.9,
+                        help="The alpha parameter to control the contribution from the query vector")
+    parser.add_argument('--rocchio-beta', type=float, metavar='beta parameter for rocchio', required=False, default=0.1,
+                        help="The beta parameter to control the contribution from the average vector of the positive PRF passages")
+    parser.add_argument('--rocchio-gamma', type=float, metavar='gamma parameter for rocchio', required=False, default=0.1,
+                        help="The gamma parameter to control the contribution from the average vector of the negative PRF passages")
+    parser.add_argument('--rocchio-topk', type=int, metavar='topk passages as positive for rocchio', required=False, default=3,
+                        help="Set topk passages as positive PRF passages for rocchio")
+    parser.add_argument('--rocchio-bottomk', type=int, metavar='bottomk passages as negative for rocchio', required=False, default=0,
+                        help="Set bottomk passages as negative PRF passages for rocchio, 0: do not use negatives prf passages.")
+    parser.add_argument('--sparse-index', type=str, metavar='sparse lucene index containing contents', required=False,
+                        help='The path to sparse index containing the passage contents')
+    parser.add_argument('--ance-prf-encoder', type=str, metavar='query encoder path for ANCE-PRF', required=False,
+                        help='The path or name to ANCE-PRF model checkpoint')
+    parser.add_argument('--ef-search', type=int, metavar='efSearch for HNSW index', required=False, default=None,
+                        help="Set efSearch for HNSW index")
+
+
+def init_query_encoder(encoder, encoder_class, tokenizer_name, topics_name, encoded_queries, device, prefix):
+    encoded_queries_map = {
+        'msmarco-passage-dev-subset': 'tct_colbert-msmarco-passage-dev-subset',
+        'dpr-nq-dev': 'dpr_multi-nq-dev',
+        'dpr-nq-test': 'dpr_multi-nq-test',
+        'dpr-trivia-dev': 'dpr_multi-trivia-dev',
+        'dpr-trivia-test': 'dpr_multi-trivia-test',
+        'dpr-wq-test': 'dpr_multi-wq-test',
+        'dpr-squad-test': 'dpr_multi-squad-test',
+        'dpr-curated-test': 'dpr_multi-curated-test'
+    }
+    encoder_class_map = {
+        "dkrr": DkrrDprQueryEncoder,
+        "dpr": DprQueryEncoder,
+        "bpr": BprQueryEncoder,
+        "tct_colbert": TctColBertQueryEncoder,
+        "ance": AnceQueryEncoder,
+        "sentence": AutoQueryEncoder,
+        "contriever": AutoQueryEncoder,
+        "aggretriever": AggretrieverQueryEncoder,
+        "auto": AutoQueryEncoder,
+    }
+
+    if encoder:
+        _encoder_class = encoder_class
+
+        # determine encoder_class
+        if encoder_class is not None:
+            encoder_class = encoder_class_map[encoder_class]
+        else:
+            # if any class keyword was matched in the given encoder name,
+            # use that encoder class
+            for class_keyword in encoder_class_map:
+                if class_keyword in encoder.lower():
+                    encoder_class = encoder_class_map[class_keyword]
+                    break
+
+            # if none of the class keyword was matched,
+            # use the AutoQueryEncoder
+            if encoder_class is None:
+                encoder_class = AutoQueryEncoder
+
+        # prepare arguments to encoder class
+        kwargs = dict(encoder_dir=encoder, tokenizer_name=tokenizer_name, device=device, prefix=prefix)
+        if (_encoder_class == "sentence") or ("sentence" in encoder):
+            kwargs.update(dict(pooling='mean', l2_norm=True))
+        if (_encoder_class == "contriever") or ("contriever" in encoder):
+            kwargs.update(dict(pooling='mean', l2_norm=False))
+
+        return encoder_class(**kwargs)
+
+    if encoded_queries:
+        if os.path.exists(encoded_queries):
+            if 'bpr' in encoded_queries:
+                return BprQueryEncoder(encoded_query_dir=encoded_queries)
+            else:
+                return QueryEncoder(encoded_queries)
+        else:
+            if 'bpr' in encoded_queries:
+                return BprQueryEncoder.load_encoded_queries(encoded_queries)
+            else:
+                return QueryEncoder.load_encoded_queries(encoded_queries)
+
+    if topics_name in encoded_queries_map:
+        return QueryEncoder.load_encoded_queries(encoded_queries_map[topics_name])
+    raise ValueError(f'No encoded queries for topic {topics_name}')
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Search a Faiss index.')
+    parser.add_argument('--topics', type=str, metavar='topic_name', required=True,
+                        help="Name of topics. Available: msmarco-passage-dev-subset.")
+    parser.add_argument('--hits', type=int, metavar='num', required=False, default=1000, help="Number of hits.")
+    parser.add_argument('--binary-hits', type=int, metavar='num', required=False, default=1000,
+                        help="Number of binary hits.")
+    parser.add_argument("--rerank", action="store_true", help='whethere rerank bpr sparse results.')
+    parser.add_argument('--topics-format', type=str, metavar='format', default=TopicsFormat.DEFAULT.value,
+                        help=f"Format of topics. Available: {[x.value for x in list(TopicsFormat)]}")
+    parser.add_argument('--output-format', type=str, metavar='format', default=OutputFormat.TREC.value,
+                        help=f"Format of output. Available: {[x.value for x in list(OutputFormat)]}")
+    parser.add_argument('--output', type=str, metavar='path', required=True, help="Path to output file.")
+    parser.add_argument('--max-passage', action='store_true',
+                        default=False, help="Select only max passage from document.")
+    parser.add_argument('--max-passage-hits', type=int, metavar='num', required=False, default=100,
+                        help="Final number of hits when selecting only max passage.")
+    parser.add_argument('--max-passage-delimiter', type=str, metavar='str', required=False, default='#',
+                        help="Delimiter between docid and passage id.")
+    parser.add_argument('--batch-size', type=int, metavar='num', required=False, default=1,
+                        help="search batch of queries in parallel")
+    parser.add_argument('--threads', type=int, metavar='num', required=False, default=1,
+                        help="maximum threads to use during search")
+    # For some test collections, a query is doc from the corpus (e.g., arguana in BEIR).
+    # We want to remove the query from the results. This is equivalent to -removeQuery in Java.
+    parser.add_argument('--remove-query', action='store_true', default=False, help="Remove query from results list.")
+    define_dsearch_args(parser)
+    args = parser.parse_args()
+
+    query_iterator = get_query_iterator(args.topics, TopicsFormat(args.topics_format))
+    topics = query_iterator.topics
+
+    query_encoder = init_query_encoder(
+        args.encoder, args.encoder_class, args.tokenizer, args.topics, args.encoded_queries, args.device, args.query_prefix)
+    if args.pca_model:
+        query_encoder = PcaEncoder(query_encoder, args.pca_model)
+    kwargs = {}
+    if os.path.exists(args.index):
+        # create searcher from index directory
+        if args.searcher.lower() == 'bpr':
+            kwargs = dict(binary_k=args.binary_hits, rerank=args.rerank)
+            searcher = BinaryDenseSearcher(args.index, query_encoder)
+        else:
+            searcher = FaissSearcher(args.index, query_encoder)
+    else:
+        # create searcher from prebuilt index name
+        if args.searcher.lower() == 'bpr':
+            kwargs = dict(binary_k=args.binary_hits, rerank=args.rerank)
+            searcher = BinaryDenseSearcher.from_prebuilt_index(args.index, query_encoder)
+        else:
+            searcher = FaissSearcher.from_prebuilt_index(args.index, query_encoder)
+
+    if args.ef_search:
+        searcher.set_hnsw_ef_search(args.ef_search)
+
+    if not searcher:
+        exit()
+
+    # Check PRF Flag
+    if args.prf_depth > 0 and type(searcher) == FaissSearcher:
+        PRF_FLAG = True
+        if args.prf_method.lower() == 'avg':
+            prfRule = DenseVectorAveragePrf()
+        elif args.prf_method.lower() == 'rocchio':
+            prfRule = DenseVectorRocchioPrf(args.rocchio_alpha, args.rocchio_beta, args.rocchio_gamma,
+                                            args.rocchio_topk, args.rocchio_bottomk)
+        # ANCE-PRF is using a new query encoder, so the input to DenseVectorAncePrf is different
+        elif args.prf_method.lower() == 'ance-prf' and type(query_encoder) == AnceQueryEncoder:
+            if os.path.exists(args.sparse_index):
+                sparse_searcher = LuceneSearcher(args.sparse_index)
+            else:
+                sparse_searcher = LuceneSearcher.from_prebuilt_index(args.sparse_index)
+            prf_query_encoder = AnceQueryEncoder(encoder_dir=args.ance_prf_encoder, tokenizer_name=args.tokenizer,
+                                                 device=args.device)
+            prfRule = DenseVectorAncePrf(prf_query_encoder, sparse_searcher)
+        print(f'Running FaissSearcher with {args.prf_method.upper()} PRF...')
+    else:
+        PRF_FLAG = False
+
+    # build output path
+    output_path = args.output
+
+    print(f'Running {args.topics} topics, saving to {output_path}...')
+    tag = 'Faiss'
+
+    output_writer = get_output_writer(output_path, OutputFormat(args.output_format), 'w',
+                                      max_hits=args.hits, tag=tag, topics=topics,
+                                      use_max_passage=args.max_passage,
+                                      max_passage_delimiter=args.max_passage_delimiter,
+                                      max_passage_hits=args.max_passage_hits)
+
+    with output_writer:
+        batch_topics = list()
+        batch_topic_ids = list()
+        for index, (topic_id, text) in enumerate(tqdm(query_iterator, total=len(topics.keys()))):
+            if args.batch_size <= 1 and args.threads <= 1:
+                if PRF_FLAG:
+                    emb_q, prf_candidates = searcher.search(text, k=args.prf_depth, return_vector=True, **kwargs)
+                    # ANCE-PRF input is different, do not need query embeddings
+                    if args.prf_method.lower() == 'ance-prf':
+                        prf_emb_q = prfRule.get_prf_q_emb(text, prf_candidates)
+                    else:
+                        prf_emb_q = prfRule.get_prf_q_emb(emb_q[0], prf_candidates)
+                        prf_emb_q = np.expand_dims(prf_emb_q, axis=0).astype('float32')
+                    hits = searcher.search(prf_emb_q, k=args.hits, **kwargs)
+                else:
+                    hits = searcher.search(text, args.hits, **kwargs)
+                results = [(topic_id, hits)]
+            else:
+                batch_topic_ids.append(str(topic_id))
+                batch_topics.append(text)
+                if (index + 1) % args.batch_size == 0 or \
+                        index == len(topics.keys()) - 1:
+                    if PRF_FLAG:
+                        q_embs, prf_candidates = searcher.batch_search(batch_topics, batch_topic_ids,
+                                                                       k=args.prf_depth, return_vector=True, **kwargs)
+                        # ANCE-PRF input is different, do not need query embeddings
+                        if args.prf_method.lower() == 'ance-prf':
+                            prf_embs_q = prfRule.get_batch_prf_q_emb(batch_topics, batch_topic_ids, prf_candidates)
+                        else:
+                            prf_embs_q = prfRule.get_batch_prf_q_emb(batch_topic_ids, q_embs, prf_candidates)
+                        results = searcher.batch_search(prf_embs_q, batch_topic_ids, k=args.hits, threads=args.threads,
+                                                        **kwargs)
+                        results = [(id_, results[id_]) for id_ in batch_topic_ids]
+                    else:
+                        results = searcher.batch_search(batch_topics, batch_topic_ids, args.hits, threads=args.threads,
+                                                        **kwargs)
+                        results = [(id_, results[id_]) for id_ in batch_topic_ids]
+                    batch_topic_ids.clear()
+                    batch_topics.clear()
+                else:
+                    continue
+
+            for topic, hits in results:
+                # For some test collections, a query is doc from the corpus (e.g., arguana in BEIR).
+                # We want to remove the query from the results.
+                if args.remove_query:
+                    hits = [hit for hit in hits if hit.docid != topic]
+                
+                output_writer.write(topic, hits)
+
+            results.clear()
diff --git a/pyserini/search/faiss/__pycache__/__init__.cpython-310.pyc b/pyserini/search/faiss/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d0df54fa19276f8521076ae012d07e91ddb54455
Binary files /dev/null and b/pyserini/search/faiss/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/search/faiss/__pycache__/_model.cpython-310.pyc b/pyserini/search/faiss/__pycache__/_model.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9e82f879056b9defcaef21096ca87656a8c6afc
Binary files /dev/null and b/pyserini/search/faiss/__pycache__/_model.cpython-310.pyc differ
diff --git a/pyserini/search/faiss/__pycache__/_prf.cpython-310.pyc b/pyserini/search/faiss/__pycache__/_prf.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1591e715df74068927e0eb972926956ecfc8be97
Binary files /dev/null and b/pyserini/search/faiss/__pycache__/_prf.cpython-310.pyc differ
diff --git a/pyserini/search/faiss/__pycache__/_searcher.cpython-310.pyc b/pyserini/search/faiss/__pycache__/_searcher.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..25494531bdaa7b3b165bdc7651277050b5190be5
Binary files /dev/null and b/pyserini/search/faiss/__pycache__/_searcher.cpython-310.pyc differ
diff --git a/pyserini/search/faiss/_model.py b/pyserini/search/faiss/_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c6e3e5fc15aab9d11d5cdfa2a1678dd040443d7
--- /dev/null
+++ b/pyserini/search/faiss/_model.py
@@ -0,0 +1,77 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+from transformers import PreTrainedModel, RobertaConfig, RobertaModel
+from transformers.file_utils import is_torch_available, requires_backends
+
+if is_torch_available():
+    import torch
+
+
+class AnceEncoder(PreTrainedModel):
+    config_class = RobertaConfig
+    base_model_prefix = 'ance_encoder'
+    load_tf_weights = None
+    _keys_to_ignore_on_load_missing = [r'position_ids']
+    _keys_to_ignore_on_load_unexpected = [r'pooler', r'classifier']
+
+    def __init__(self, config: RobertaConfig):
+        requires_backends(self, 'torch')
+        super().__init__(config)
+        self.config = config
+        self.roberta = RobertaModel(config)
+        self.embeddingHead = torch.nn.Linear(config.hidden_size, 768)
+        self.norm = torch.nn.LayerNorm(768)
+        self.init_weights()
+
+    # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
+    def _init_weights(self, module):
+        """ Initialize the weights """
+        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding)):
+            # Slightly different from the TF version which uses truncated_normal for initialization
+            # cf https://github.com/pytorch/pytorch/pull/5617
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, torch.nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    def init_weights(self):
+        self.roberta.init_weights()
+        self.embeddingHead.apply(self._init_weights)
+        self.norm.apply(self._init_weights)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+    ):
+        input_shape = input_ids.size()
+        device = input_ids.device
+        if attention_mask is None:
+            attention_mask = (
+                torch.ones(input_shape, device=device)
+                if input_ids is None
+                else (input_ids != self.roberta.config.pad_token_id)
+            )
+        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
+        sequence_output = outputs.last_hidden_state
+        pooled_output = sequence_output[:, 0, :]
+        pooled_output = self.norm(self.embeddingHead(pooled_output))
+        return pooled_output
diff --git a/pyserini/search/faiss/_prf.py b/pyserini/search/faiss/_prf.py
new file mode 100644
index 0000000000000000000000000000000000000000..68167318d6ae252568b9423ab3cc84b9b72346e6
--- /dev/null
+++ b/pyserini/search/faiss/_prf.py
@@ -0,0 +1,209 @@
+import numpy as np
+from typing import List, Dict
+from pyserini.search.faiss import PRFDenseSearchResult, AnceQueryEncoder
+from pyserini.search.lucene import LuceneSearcher
+import json
+
+
+class DenseVectorPrf:
+    def __init__(self):
+        pass
+
+    def get_prf_q_emb(self, **kwargs):
+        pass
+
+    def get_batch_prf_q_emb(self, **kwargs):
+        pass
+
+
+class DenseVectorAveragePrf(DenseVectorPrf):
+
+    def get_prf_q_emb(self, emb_qs: np.ndarray = None, prf_candidates: List[PRFDenseSearchResult] = None):
+        """Perform Average PRF with Dense Vectors
+
+        Parameters
+        ----------
+        emb_qs : np.ndarray
+            Query embedding
+        prf_candidates : List[PRFDenseSearchResult]
+            List of PRFDenseSearchResult, contains document embeddings.
+
+        Returns
+        -------
+        np.ndarray
+            return new query embeddings
+        """
+        all_candidate_embs = [item.vectors for item in prf_candidates]
+        new_emb_qs = np.mean(np.vstack((emb_qs, all_candidate_embs)), axis=0)
+        return new_emb_qs
+
+    def get_batch_prf_q_emb(self, topic_ids: List[str] = None, emb_qs: np.ndarray = None,
+                            prf_candidates: Dict[str, List[PRFDenseSearchResult]] = None):
+        """Perform Average PRF with Dense Vectors
+
+        Parameters
+        ----------
+        topic_ids : List[str]
+            List of topic ids.
+        emb_qs : np.ndarray
+            Query embeddings
+        prf_candidates : List[PRFDenseSearchResult]
+            List of PRFDenseSearchResult, contains document embeddings.
+
+        Returns
+        -------
+        np.ndarray
+            return new query embeddings
+        """
+
+        qids = list()
+        new_emb_qs = list()
+        for index, topic_id in enumerate(topic_ids):
+            qids.append(topic_id)
+            new_emb_qs.append(self.get_prf_q_emb(emb_qs[index], prf_candidates[topic_id]))
+        new_emb_qs = np.array(new_emb_qs).astype('float32')
+        return new_emb_qs
+
+
+class DenseVectorRocchioPrf(DenseVectorPrf):
+    def __init__(self, alpha: float, beta: float, gamma: float, topk: int, bottomk: int):
+        """
+        Parameters
+        ----------
+        alpha : float
+            Rocchio parameter, controls the weight assigned to the original query embedding.
+        beta : float
+            Rocchio parameter, controls the weight assigned to the positive document embeddings.
+        gamma : float
+            Rocchio parameter, controls the weight assigned to the negative document embeddings.
+        topk : int
+            Rocchio parameter, set topk documents as positive document feedbacks.
+        bottomk : int
+            Rocchio parameter, set bottomk documents as negative document feedbacks.
+        """
+        DenseVectorPrf.__init__(self)
+        self.alpha = alpha
+        self.beta = beta
+        self.gamma = gamma
+        self.topk = topk
+        self.bottomk = bottomk
+
+    def get_prf_q_emb(self, emb_qs: np.ndarray = None, prf_candidates: List[PRFDenseSearchResult] = None):
+        """Perform Rocchio PRF with Dense Vectors
+
+        Parameters
+        ----------
+        emb_qs : np.ndarray
+            query embedding
+        prf_candidates : List[PRFDenseSearchResult]
+            List of PRFDenseSearchResult, contains document embeddings.
+
+        Returns
+        -------
+        np.ndarray
+            return new query embeddings
+        """
+
+        all_candidate_embs = [item.vectors for item in prf_candidates]
+        weighted_query_embs = self.alpha * emb_qs
+        weighted_mean_pos_doc_embs = self.beta * np.mean(all_candidate_embs[:self.topk], axis=0)
+        new_emb_q = weighted_query_embs + weighted_mean_pos_doc_embs
+        if self.bottomk > 0:
+            weighted_mean_neg_doc_embs = self.gamma * np.mean(all_candidate_embs[-self.bottomk:], axis=0)
+            new_emb_q -= weighted_mean_neg_doc_embs
+        return new_emb_q
+
+    def get_batch_prf_q_emb(self, topic_ids: List[str] = None, emb_qs: np.ndarray = None,
+                            prf_candidates: Dict[str, List[PRFDenseSearchResult]] = None):
+        """Perform Rocchio PRF with Dense Vectors
+
+        Parameters
+        ----------
+        topic_ids : List[str]
+            List of topic ids.
+        emb_qs : np.ndarray
+            Query embeddings
+        prf_candidates : List[PRFDenseSearchResult]
+            List of PRFDenseSearchResult, contains document embeddings.
+
+        Returns
+        -------
+        np.ndarray
+            return new query embeddings
+        """
+        qids = list()
+        new_emb_qs = list()
+        for index, topic_id in enumerate(topic_ids):
+            qids.append(topic_id)
+            new_emb_qs.append(self.get_prf_q_emb(emb_qs[index], prf_candidates[topic_id]))
+        new_emb_qs = np.array(new_emb_qs).astype('float32')
+        return new_emb_qs
+
+
+class DenseVectorAncePrf(DenseVectorPrf):
+    def __init__(self, encoder: AnceQueryEncoder, sparse_searcher: LuceneSearcher):
+        """
+        Parameters
+        ----------
+        encoder : AnceQueryEncoder
+            The new ANCE query encoder for ANCE-PRF.
+        sparse_searcher : LuceneSearcher
+            The sparse searcher using lucene index, for retrieving doc contents.
+        """
+        DenseVectorPrf.__init__(self)
+        self.encoder = encoder
+        self.sparse_searcher = sparse_searcher
+
+    def get_prf_q_emb(self, query: str = None, prf_candidates: List[PRFDenseSearchResult] = None):
+        """Perform single ANCE-PRF with Dense Vectors
+
+        Parameters
+        ----------
+        query : str
+            query text
+        prf_candidates : List[PRFDenseSearchResult]
+            List of PRFDenseSearchResult, contains document embeddings.
+
+        Returns
+        -------
+        np.ndarray
+            return new query embeddings
+        """
+        passage_texts = [query]
+        for item in prf_candidates:
+            raw_text = json.loads(self.sparse_searcher.doc(item.docid).raw())
+            passage_texts.append(raw_text['contents'])
+        full_text = f'{self.encoder.tokenizer.cls_token}{self.encoder.tokenizer.sep_token.join(passage_texts)}{self.encoder.tokenizer.sep_token}'
+        emb_q = self.encoder.prf_encode(full_text)
+        emb_q = emb_q.reshape((1, len(emb_q)))
+        return emb_q
+
+    def get_batch_prf_q_emb(self, topics: List[str], topic_ids: List[str],
+                            prf_candidates: Dict[str, List[PRFDenseSearchResult]]) -> np.ndarray:
+        """Perform batch ANCE-PRF with Dense Vectors
+
+        Parameters
+        ----------
+        topics : List[str]
+            List of query texts.
+        topic_ids: List[str]
+            List of topic ids.
+        prf_candidates : List[PRFDenseSearchResult]
+            List of PRFDenseSearchResult, contains document embeddings.
+
+        Returns
+        -------
+        np.ndarray
+            return new query embeddings
+        """
+        prf_passage_texts = list()
+        for index, query in enumerate(topics):
+            passage_texts = [query]
+            prf_candidate = prf_candidates[topic_ids[index]]
+            for item in prf_candidate:
+                raw_text = json.loads(self.sparse_searcher.doc(item.docid).raw())
+                passage_texts.append(raw_text['contents'])
+            full_text = f'{self.encoder.tokenizer.cls_token}{self.encoder.tokenizer.sep_token.join(passage_texts)}{self.encoder.tokenizer.sep_token}'
+            prf_passage_texts.append(full_text)
+        emb_q = self.encoder.prf_batch_encode(prf_passage_texts)
+        return emb_q
diff --git a/pyserini/search/faiss/_searcher.py b/pyserini/search/faiss/_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..828fc5ad25d462024586d93f0d3130e0b2499b79
--- /dev/null
+++ b/pyserini/search/faiss/_searcher.py
@@ -0,0 +1,696 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's dense search interface to FAISS index.
+The main entry point is the ``FaissSearcher`` class.
+"""
+
+import os
+from dataclasses import dataclass
+from typing import Dict, List, Union, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+
+from transformers import (AutoModel, AutoTokenizer, BertModel, BertTokenizer, BertTokenizerFast,
+                          DPRQuestionEncoder, DPRQuestionEncoderTokenizer, RobertaTokenizer)
+from transformers.file_utils import is_faiss_available, requires_backends
+
+from pyserini.util import (download_encoded_queries, download_prebuilt_index,
+                           get_dense_indexes_info, get_sparse_index)
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.index import Document
+
+from ._model import AnceEncoder
+import torch
+
+from ...encode import PcaEncoder
+from ...encode._aggretriever import BERTAggretrieverEncoder, DistlBERTAggretrieverEncoder
+
+if is_faiss_available():
+    import faiss
+
+
+class QueryEncoder:
+    def __init__(self, encoded_query_dir: str = None):
+        self.has_model = False
+        self.has_encoded_query = False
+        if encoded_query_dir:
+            self.embedding = self._load_embeddings(encoded_query_dir)
+            self.has_encoded_query = True
+
+    def encode(self, query: str):
+        return self.embedding[query]
+
+    @classmethod
+    def load_encoded_queries(cls, encoded_query_name: str):
+        """Build a query encoder from a pre-encoded query; download the encoded queries if necessary.
+
+        Parameters
+        ----------
+        encoded_query_name : str
+            pre encoded query name.
+
+        Returns
+        -------
+        QueryEncoder
+            Encoder built from the pre encoded queries.
+        """
+        print(f'Attempting to initialize pre-encoded queries {encoded_query_name}.')
+        try:
+            query_dir = download_encoded_queries(encoded_query_name)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        print(f'Initializing {encoded_query_name}...')
+        return cls(encoded_query_dir=query_dir)
+
+    @staticmethod
+    def _load_embeddings(encoded_query_dir):
+        df = pd.read_pickle(os.path.join(encoded_query_dir, 'embedding.pkl'))
+        return dict(zip(df['text'].tolist(), df['embedding'].tolist()))
+
+
+class AggretrieverQueryEncoder(QueryEncoder):
+    def __init__(self, encoder_dir: str = None, tokenizer_name: str = None,
+                 encoded_query_dir: str = None, device: str = 'cpu', **kwargs):
+        if encoder_dir:
+            self.device = device
+            if 'distilbert' in encoder_dir.lower():
+                self.model = DistlBERTAggretrieverEncoder.from_pretrained(encoder_dir)
+            else:
+                self.model = BERTAggretrieverEncoder.from_pretrained(encoder_dir)
+            self.model.to(self.device)
+            self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or encoder_dir)
+            self.has_model = True
+        if (not self.has_model) and (not self.has_encoded_query):
+            raise Exception('Neither query encoder model nor encoded queries provided. Please provide at least one')
+
+    def encode(self, query: str,  max_length: int=32):
+        if self.has_model:
+            inputs = self.tokenizer(
+                query,
+                max_length=max_length,
+                padding="longest",
+                truncation=True,
+                add_special_tokens=True,
+                return_tensors='pt'
+            )
+            inputs.to(self.device)
+            outputs = self.model(**inputs)
+            embeddings = outputs.detach().cpu().numpy() 
+            return embeddings.flatten()
+        else:
+            return super().encode(query)        
+        
+
+class TctColBertQueryEncoder(QueryEncoder):
+
+    def __init__(self, encoder_dir: str = None, tokenizer_name: str = None,
+                 encoded_query_dir: str = None, device: str = 'cpu', **kwargs):
+        super().__init__(encoded_query_dir)
+        if encoder_dir:
+            self.device = device
+            self.model = BertModel.from_pretrained(encoder_dir)
+            self.model.to(self.device)
+            self.tokenizer = BertTokenizer.from_pretrained(tokenizer_name or encoder_dir)
+            self.has_model = True
+        if (not self.has_model) and (not self.has_encoded_query):
+            raise Exception('Neither query encoder model nor encoded queries provided. Please provide at least one')
+
+    def encode(self, query: str):
+        if self.has_model:
+            max_length = 36  # hardcode for now
+            inputs = self.tokenizer(
+                '[CLS] [Q] ' + query + '[MASK]' * max_length,
+                max_length=max_length,
+                truncation=True,
+                add_special_tokens=False,
+                return_tensors='pt'
+            )
+            inputs.to(self.device)
+            outputs = self.model(**inputs)
+            embeddings = outputs.last_hidden_state.detach().cpu().numpy()
+            return np.average(embeddings[:, 4:, :], axis=-2).flatten()
+        else:
+            return super().encode(query)
+
+
+class DprQueryEncoder(QueryEncoder):
+
+    def __init__(self, encoder_dir: str = None, tokenizer_name: str = None,
+                 encoded_query_dir: str = None, device: str = 'cpu', **kwargs):
+        super().__init__(encoded_query_dir)
+        if encoder_dir:
+            self.device = device
+            self.model = DPRQuestionEncoder.from_pretrained(encoder_dir)
+            self.model.to(self.device)
+            self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(tokenizer_name or encoder_dir)
+            self.has_model = True
+        if (not self.has_model) and (not self.has_encoded_query):
+            raise Exception('Neither query encoder model nor encoded queries provided. Please provide at least one')
+
+    def encode(self, query: str):
+        if self.has_model:
+            input_ids = self.tokenizer(query, return_tensors='pt')
+            input_ids.to(self.device)
+            embeddings = self.model(input_ids["input_ids"]).pooler_output.detach().cpu().numpy()
+            return embeddings.flatten()
+        else:
+            return super().encode(query)
+
+
+class BprQueryEncoder(QueryEncoder):
+
+    def __init__(self, encoder_dir: str = None, tokenizer_name: str = None,
+                 encoded_query_dir: str = None, device: str = 'cpu', **kwargs):
+        self.has_model = False
+        self.has_encoded_query = False
+        if encoded_query_dir:
+            self.embedding = self._load_embeddings(encoded_query_dir)
+            self.has_encoded_query = True
+
+        if encoder_dir:
+            self.device = device
+            self.model = DPRQuestionEncoder.from_pretrained(encoder_dir)
+            self.model.to(self.device)
+            self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(tokenizer_name or encoder_dir)
+            self.has_model = True
+        if (not self.has_model) and (not self.has_encoded_query):
+            raise Exception('Neither query encoder model nor encoded queries provided. Please provide at least one')
+
+    def encode(self, query: str):
+        if self.has_model:
+            input_ids = self.tokenizer(query, return_tensors='pt')
+            input_ids.to(self.device)
+            embeddings = self.model(input_ids["input_ids"]).pooler_output.detach().cpu()
+            dense_embeddings = embeddings.numpy()
+            sparse_embeddings = self.convert_to_binary_code(embeddings).numpy()
+            return {'dense': dense_embeddings.flatten(), 'sparse': sparse_embeddings.flatten()}
+        else:
+            return super().encode(query)
+
+    def convert_to_binary_code(self, input_repr: torch.Tensor):
+        return input_repr.new_ones(input_repr.size()).masked_fill_(input_repr < 0, -1.0)
+
+    @staticmethod
+    def _load_embeddings(encoded_query_dir):
+        df = pd.read_pickle(os.path.join(encoded_query_dir, 'embedding.pkl'))
+        ret = {}
+        for text, dense, sparse in zip(df['text'].tolist(), df['dense_embedding'].tolist(),
+                                       df['sparse_embedding'].tolist()):
+            ret[text] = {'dense': dense, 'sparse': sparse}
+        return ret
+
+
+class DkrrDprQueryEncoder(QueryEncoder):
+
+    def __init__(self, encoder_dir: str = None, encoded_query_dir: str = None, device: str = 'cpu',
+                 prefix: str = "question:", **kwargs):
+        super().__init__(encoded_query_dir)
+        self.device = device
+        self.model = BertModel.from_pretrained(encoder_dir)
+        self.model.to(self.device)
+        self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
+        self.has_model = True
+        self.prefix = prefix
+
+    @staticmethod
+    def _mean_pooling(model_output, attention_mask):
+        model_output = model_output[0].masked_fill(1 - attention_mask[:, :, None], 0.)
+        model_output = torch.sum(model_output, dim=1) / torch.clamp(torch.sum(attention_mask, dim=1), min=1e-9)[:, None]
+        return model_output.flatten()
+
+    def encode(self, query: str):
+        if self.has_model:
+            if self.prefix:
+                query = f'{self.prefix} {query}'
+            inputs = self.tokenizer(query, return_tensors='pt', max_length=40, padding="max_length")
+            inputs.to(self.device)
+            outputs = self.model(input_ids=inputs["input_ids"],
+                                 attention_mask=inputs["attention_mask"])
+            embeddings = self._mean_pooling(outputs, inputs['attention_mask']).detach().cpu().numpy()
+            return embeddings.flatten()
+        else:
+            return super().encode(query)
+
+
+class AnceQueryEncoder(QueryEncoder):
+
+    def __init__(self, encoder_dir: str = None, tokenizer_name: str = None,
+                 encoded_query_dir: str = None, device: str = 'cpu', **kwargs):
+        super().__init__(encoded_query_dir)
+        if encoder_dir:
+            self.device = device
+            self.model = AnceEncoder.from_pretrained(encoder_dir)
+            self.model.to(self.device)
+            self.tokenizer = RobertaTokenizer.from_pretrained(tokenizer_name or encoder_dir)
+            self.has_model = True
+            self.tokenizer.do_lower_case = True
+        if (not self.has_model) and (not self.has_encoded_query):
+            raise Exception('Neither query encoder model nor encoded queries provided. Please provide at least one')
+
+    def encode(self, query: str):
+        if self.has_model:
+            inputs = self.tokenizer(
+                [query],
+                max_length=64,
+                padding='longest',
+                truncation=True,
+                add_special_tokens=True,
+                return_tensors='pt'
+            )
+            inputs.to(self.device)
+            embeddings = self.model(inputs["input_ids"]).detach().cpu().numpy()
+            return embeddings.flatten()
+        else:
+            return super().encode(query)
+
+    def prf_encode(self, query: str):
+        if self.has_model:
+            inputs = self.tokenizer(
+                [query],
+                max_length=512,
+                padding='longest',
+                truncation=True,
+                add_special_tokens=False,
+                return_tensors='pt'
+            )
+            inputs.to(self.device)
+            embeddings = self.model(inputs["input_ids"]).detach().cpu().numpy()
+            return embeddings.flatten()
+        else:
+            return super().encode(query)
+
+    def prf_batch_encode(self, query: List[str]):
+        inputs = self.tokenizer(
+            query,
+            max_length=512,
+            padding='longest',
+            truncation=True,
+            add_special_tokens=False,
+            return_tensors='pt'
+        )
+        inputs.to(self.device)
+        embeddings = self.model(inputs["input_ids"]).detach().cpu().numpy()
+        return embeddings
+
+
+class AutoQueryEncoder(QueryEncoder):
+
+    def __init__(self, encoder_dir: str = None, tokenizer_name: str = None,
+                 encoded_query_dir: str = None, device: str = 'cpu',
+                 pooling: str = 'cls', l2_norm: bool = False, **kwargs):
+        super().__init__(encoded_query_dir)
+        if encoder_dir:
+            self.device = device
+            self.model = AutoModel.from_pretrained(encoder_dir)
+            self.model.to(self.device)
+            try:
+                self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or encoder_dir)
+            except:
+                self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name or encoder_dir, use_fast=False)
+            self.has_model = True
+            self.pooling = pooling
+            self.l2_norm = l2_norm
+        if (not self.has_model) and (not self.has_encoded_query):
+            raise Exception('Neither query encoder model nor encoded queries provided. Please provide at least one')
+
+    @staticmethod
+    def _mean_pooling(model_output, attention_mask):
+        token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
+        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        return sum_embeddings / sum_mask
+
+    def encode(self, query: str):
+        if self.has_model:
+            inputs = self.tokenizer(
+                query,
+                add_special_tokens=True,
+                return_tensors='pt',
+                truncation='only_first',
+                padding='longest',
+                return_token_type_ids=False,
+            )
+
+            inputs.to(self.device)
+            outputs = self.model(**inputs)
+            if self.pooling == "mean":
+                embeddings = self._mean_pooling(outputs, inputs['attention_mask']).detach().cpu().numpy()
+            else:
+                embeddings = outputs[0][:, 0, :].detach().cpu().numpy()
+            if self.l2_norm:
+                faiss.normalize_L2(embeddings)
+            return embeddings.flatten()
+        else:
+            return super().encode(query)
+
+
+@dataclass
+class DenseSearchResult:
+    docid: str
+    score: float
+
+
+@dataclass
+class PRFDenseSearchResult:
+    docid: str
+    score: float
+    vectors: [float]
+
+
+class FaissSearcher:
+    """Simple Searcher for dense representation
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to faiss index directory.
+    """
+
+    def __init__(self, index_dir: str, query_encoder: Union[QueryEncoder, str],
+                 prebuilt_index_name: Optional[str] = None):
+        requires_backends(self, "faiss")
+        if isinstance(query_encoder, QueryEncoder) or isinstance(query_encoder, PcaEncoder):
+            self.query_encoder = query_encoder
+        else:
+            self.query_encoder = self._init_encoder_from_str(query_encoder)
+        self.index, self.docids = self.load_index(index_dir)
+        self.dimension = self.index.d
+        self.num_docs = self.index.ntotal
+
+        assert self.docids is None or self.num_docs == len(self.docids)
+        if prebuilt_index_name:
+            sparse_index = get_sparse_index(prebuilt_index_name)
+            self.ssearcher = LuceneSearcher.from_prebuilt_index(sparse_index)
+
+    @classmethod
+    def from_prebuilt_index(cls, prebuilt_index_name: str, query_encoder: QueryEncoder):
+        """Build a searcher from a pre-built index; download the index if necessary.
+
+        Parameters
+        ----------
+        query_encoder: QueryEncoder
+            the query encoder, which has `encode` method that convert query text to embedding
+        prebuilt_index_name : str
+            Prebuilt index name.
+
+        Returns
+        -------
+        FaissSearcher
+            Searcher built from the prebuilt faiss index.
+        """
+        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+        try:
+            index_dir = download_prebuilt_index(prebuilt_index_name)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        print(f'Initializing {prebuilt_index_name}...')
+        return cls(index_dir, query_encoder, prebuilt_index_name)
+
+    @staticmethod
+    def list_prebuilt_indexes():
+        """Display information about available prebuilt indexes."""
+        get_dense_indexes_info()
+
+    def search(self, query: Union[str, np.ndarray], k: int = 10, threads: int = 1, return_vector: bool = False) \
+            -> Union[List[DenseSearchResult], Tuple[np.ndarray, List[PRFDenseSearchResult]]]:
+        """Search the collection.
+
+        Parameters
+        ----------
+        query : Union[str, np.ndarray]
+            query text or query embeddings
+        k : int
+            Number of hits to return.
+        threads : int
+            Maximum number of threads to use for intra-query search.
+        return_vector : bool
+            Return the results with vectors
+        Returns
+        -------
+        Union[List[DenseSearchResult], Tuple[np.ndarray, List[PRFDenseSearchResult]]]
+            Either returns a list of search results.
+            Or returns the query vector with the list of PRF dense search results with vectors.
+        """
+        if isinstance(query, str):
+            emb_q = self.query_encoder.encode(query)
+            assert len(emb_q) == self.dimension
+            emb_q = emb_q.reshape((1, len(emb_q)))
+        else:
+            emb_q = query
+        faiss.omp_set_num_threads(threads)
+        if return_vector:
+            distances, indexes, vectors = self.index.search_and_reconstruct(emb_q, k)
+            vectors = vectors[0]
+            distances = distances.flat
+            indexes = indexes.flat
+            return emb_q, [PRFDenseSearchResult(self.docids[idx], score, vector)
+                           for score, idx, vector in zip(distances, indexes, vectors) if idx != -1]
+        else:
+            distances, indexes = self.index.search(emb_q, k)
+            distances = distances.flat
+            indexes = indexes.flat
+            return [DenseSearchResult(self.docids[idx], score)
+                    for score, idx in zip(distances, indexes) if idx != -1]
+
+    def batch_search(self, queries: Union[List[str], np.ndarray], q_ids: List[str], k: int = 10,
+                     threads: int = 1, return_vector: bool = False) \
+            -> Union[Dict[str, List[DenseSearchResult]], Tuple[np.ndarray, Dict[str, List[PRFDenseSearchResult]]]]:
+        """
+
+        Parameters
+        ----------
+        queries : Union[List[str], np.ndarray]
+            List of query texts or list of query embeddings
+        q_ids : List[str]
+            List of corresponding query ids.
+        k : int
+            Number of hits to return.
+        threads : int
+            Maximum number of threads to use.
+        return_vector : bool
+            Return the results with vectors
+
+        Returns
+        -------
+        Union[Dict[str, List[DenseSearchResult]], Tuple[np.ndarray, Dict[str, List[PRFDenseSearchResult]]]]
+            Either returns a dictionary holding the search results, with the query ids as keys and the
+            corresponding lists of search results as the values.
+            Or returns a tuple with ndarray of query vectors and a dictionary of PRF Dense Search Results with vectors
+        """
+        if isinstance(queries, np.ndarray):
+            q_embs = queries
+        else:
+            q_embs = np.array([self.query_encoder.encode(q) for q in queries])
+            n, m = q_embs.shape
+            assert m == self.dimension
+        faiss.omp_set_num_threads(threads)
+        if return_vector:
+            D, I, V = self.index.search_and_reconstruct(q_embs, k)
+            return q_embs, {key: [PRFDenseSearchResult(self.docids[idx], score, vector)
+                                  for score, idx, vector in zip(distances, indexes, vectors) if idx != -1]
+                            for key, distances, indexes, vectors in zip(q_ids, D, I, V)}
+        else:
+            D, I = self.index.search(q_embs, k)
+            return {key: [DenseSearchResult(self.docids[idx], score)
+                          for score, idx in zip(distances, indexes) if idx != -1]
+                    for key, distances, indexes in zip(q_ids, D, I)}
+
+    def load_index(self, index_dir: str):
+        index_path = os.path.join(index_dir, 'index')
+        docid_path = os.path.join(index_dir, 'docid')
+        index = faiss.read_index(index_path)
+        docids = self.load_docids(docid_path)
+        return index, docids
+
+    def doc(self, docid: Union[str, int]) -> Optional[Document]:
+        """Return the :class:`Document` corresponding to ``docid``. Since dense indexes don't store documents
+        but sparse indexes do, route over to corresponding sparse index (according to prebuilt_index_info.py)
+        and use its doc API 
+
+        Parameters
+        ----------
+        docid : Union[str, int]
+            Overloaded ``docid``: either an external collection ``docid`` (``str``) or an internal Lucene ``docid``
+            (``int``).
+
+        Returns
+        -------
+        Document
+            :class:`Document` corresponding to the ``docid``.
+        """
+        return self.ssearcher.doc(docid) if self.ssearcher else None
+
+    @staticmethod
+    def _init_encoder_from_str(encoder):
+        encoder_lower = encoder.lower()
+        if 'dpr' in encoder_lower:
+            return DprQueryEncoder(encoder_dir=encoder)
+        elif 'tct_colbert' in encoder_lower:
+            return TctColBertQueryEncoder(encoder_dir=encoder)
+        elif 'ance' in encoder_lower:
+            return AnceQueryEncoder(encoder_dir=encoder)
+        elif 'sentence' in encoder_lower:
+            return AutoQueryEncoder(encoder_dir=encoder, pooling='mean', l2_norm=True)
+        else:
+            return AutoQueryEncoder(encoder_dir=encoder)
+
+    @staticmethod
+    def load_docids(docid_path: str) -> List[str]:
+        id_f = open(docid_path, 'r')
+        docids = [line.rstrip() for line in id_f.readlines()]
+        id_f.close()
+        return docids
+    
+    def set_hnsw_ef_search(self, ef_search: int):
+        self.index.hnsw.efSearch = ef_search
+
+
+class BinaryDenseSearcher(FaissSearcher):
+    """Simple Searcher for binary-dense representation
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to faiss index directory.
+    """
+
+    def __init__(self, index_dir: str, query_encoder: Union[QueryEncoder, str],
+                 prebuilt_index_name: Optional[str] = None):
+        super().__init__(index_dir, query_encoder, prebuilt_index_name)
+
+    def search(self, query: str, k: int = 10, binary_k: int = 100, rerank: bool = True, threads: int = 1) \
+            -> List[DenseSearchResult]:
+        """Search the collection.
+
+        Parameters
+        ----------
+        query : str
+            query text
+        k : int
+            Number of hits to return at second stage.
+        binary_k : int
+            Number of hits to return at first stage.
+        rerank: bool
+            Whether to use dense repr to rerank the binary ranking results.
+        threads : int
+            Maximum number of threads to use for intra-query search.
+        Returns
+        -------
+        List[DenseSearchResult]
+            List of search results.
+        """
+        ret = self.query_encoder.encode(query)
+        dense_emb_q = ret['dense']
+        sparse_emb_q = ret['sparse']
+        assert len(dense_emb_q) == self.dimension
+        assert len(sparse_emb_q) == self.dimension
+
+        dense_emb_q = dense_emb_q.reshape((1, len(dense_emb_q)))
+        sparse_emb_q = sparse_emb_q.reshape((1, len(sparse_emb_q)))
+        faiss.omp_set_num_threads(threads)
+        distances, indexes = self.binary_dense_search(k, binary_k, rerank, dense_emb_q, sparse_emb_q)
+        distances = distances.flat
+        indexes = indexes.flat
+        return [DenseSearchResult(str(idx), score)
+                for score, idx in zip(distances, indexes) if idx != -1]
+
+    def batch_search(self, queries: List[str], q_ids: List[str], k: int = 10, binary_k: int = 100,
+                     rerank: bool = True, threads: int = 1) -> Dict[str, List[DenseSearchResult]]:
+        """
+
+        Parameters
+        ----------
+        queries : List[str]
+            List of query texts
+        q_ids : List[str]
+            List of corresponding query ids.
+        k : int
+            Number of hits to return.
+        binary_k : int
+            Number of hits to return at first stage.
+        rerank: bool
+            Whether to use dense repr to rerank the binary ranking results.
+        threads : int
+            Maximum number of threads to use.
+
+        Returns
+        -------
+        Dict[str, List[DenseSearchResult]]
+            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
+            results as the values.
+        """
+        dense_q_embs = []
+        sparse_q_embs = []
+        for q in queries:
+            ret = self.query_encoder.encode(q)
+            dense_q_embs.append(ret['dense'])
+            sparse_q_embs.append(ret['sparse'])
+        dense_q_embs = np.array(dense_q_embs)
+        sparse_q_embs = np.array(sparse_q_embs)
+        n, m = dense_q_embs.shape
+        assert m == self.dimension
+        faiss.omp_set_num_threads(threads)
+        D, I = self.binary_dense_search(k, binary_k, rerank, dense_q_embs, sparse_q_embs)
+        return {key: [DenseSearchResult(str(idx), score)
+                      for score, idx in zip(distances, indexes) if idx != -1]
+                for key, distances, indexes in zip(q_ids, D, I)}
+
+    def binary_dense_search(self, k, binary_k, rerank, dense_emb_q, sparse_emb_q):
+        num_queries = dense_emb_q.shape[0]
+        sparse_emb_q = np.packbits(np.where(sparse_emb_q > 0, 1, 0)).reshape(num_queries, -1)
+
+        if not rerank:
+            distances, indexes = self.index.search(sparse_emb_q, k)
+        else:
+            raw_index = self.index.index
+            _, indexes = raw_index.search(sparse_emb_q, binary_k)
+            sparse_emb_p = np.vstack(
+                [np.unpackbits(raw_index.reconstruct(int(id_))) for id_ in indexes.reshape(-1)]
+            )
+            sparse_emb_p = sparse_emb_p.reshape(
+                dense_emb_q.shape[0], binary_k, dense_emb_q.shape[1]
+            )
+            sparse_emb_p = sparse_emb_p.astype(np.float32)
+            sparse_emb_p = sparse_emb_p * 2 - 1
+            distances = np.einsum("ijk,ik->ij", sparse_emb_p, dense_emb_q)
+            sorted_indices = np.argsort(-distances, axis=1)
+
+            indexes = indexes[np.arange(num_queries)[:, None], sorted_indices]
+            indexes = np.array([self.index.id_map.at(int(id_)) for id_ in indexes.reshape(-1)], dtype=np.int)
+            indexes = indexes.reshape(num_queries, -1)[:, :k]
+            distances = distances[np.arange(num_queries)[:, None], sorted_indices][:, :k]
+        return distances, indexes
+
+    def load_index(self, index_dir: str):
+        index_path = os.path.join(index_dir, 'index')
+        index = faiss.read_index_binary(index_path)
+        return index, None
+
+    @staticmethod
+    def _init_encoder_from_str(encoder):
+        encoder = encoder.lower()
+        if 'bpr' in encoder:
+            return BprQueryEncoder(encoder_dir=encoder)
+        else:
+            raise NotImplementedError
diff --git a/pyserini/search/hybrid/__init__.py b/pyserini/search/hybrid/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..61ca8b0c8ed5d15cf1189c6468158999d869bdf5
--- /dev/null
+++ b/pyserini/search/hybrid/__init__.py
@@ -0,0 +1,19 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._searcher import HybridSearcher
+
+__all__ = ['HybridSearcher']
\ No newline at end of file
diff --git a/pyserini/search/hybrid/__main__.py b/pyserini/search/hybrid/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d19a0c3c2afefdc286a5aa4d775d80bcac8bf4a
--- /dev/null
+++ b/pyserini/search/hybrid/__main__.py
@@ -0,0 +1,185 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import json
+import os
+import sys
+
+from tqdm import tqdm
+
+from pyserini.search.faiss import FaissSearcher
+from pyserini.query_iterator import get_query_iterator, TopicsFormat
+from pyserini.output_writer import get_output_writer, OutputFormat
+from pyserini.search.lucene import LuceneImpactSearcher, LuceneSearcher
+from pyserini.search.hybrid import HybridSearcher
+
+from pyserini.search.faiss.__main__ import define_dsearch_args, init_query_encoder
+from pyserini.search.lucene.__main__ import define_search_args, set_bm25_parameters
+
+# Fixes this error: "OMP: Error #15: Initializing libomp.a, but found libomp.dylib already initialized."
+# https://stackoverflow.com/questions/53014306/error-15-initializing-libiomp5-dylib-but-found-libiomp5-dylib-already-initial
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+
+
+def define_fusion_args(parser):
+    parser.add_argument('--alpha', type=float, metavar='num', required=False, default=0.1,
+                        help="alpha for hybrid search")
+    parser.add_argument('--hits', type=int, required=False, default=1000, help='number of hits from dense and sparse')
+    parser.add_argument('--normalization', action='store_true', required=False, help='hybrid score with normalization')
+    parser.add_argument('--weight-on-dense', action='store_true', required=False, help='weight on dense part')
+
+
+def parse_args(parser, commands):
+    # Divide argv by commands
+    split_argv = [[]]
+    for c in sys.argv[1:]:
+        if c in commands.choices:
+            split_argv.append([c])
+        else:
+            split_argv[-1].append(c)
+    # Initialize namespace
+    args = argparse.Namespace()
+    for c in commands.choices:
+        setattr(args, c, None)
+    # Parse each command
+    parser.parse_args(split_argv[0], namespace=args)  # Without command
+    for argv in split_argv[1:]:  # Commands
+        n = argparse.Namespace()
+        setattr(args, argv[0], n)
+        parser.parse_args(argv, namespace=n)
+    return args
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Conduct a hybrid search on dense+sparse indexes.')
+
+    commands = parser.add_subparsers(title='sub-commands')
+
+    dense_parser = commands.add_parser('dense')
+    define_dsearch_args(dense_parser)
+
+    sparse_parser = commands.add_parser('sparse')
+    define_search_args(sparse_parser)
+
+    fusion_parser = commands.add_parser('fusion')
+    define_fusion_args(fusion_parser)
+
+    run_parser = commands.add_parser('run')
+    run_parser.add_argument('--topics', type=str, metavar='topic_name', required=False,
+                            help="Name of topics. Available: msmarco-passage-dev-subset.")
+    run_parser.add_argument('--hits', type=int, metavar='num', required=False, default=1000, help="Number of hits.")
+    run_parser.add_argument('--topics-format', type=str, metavar='format', default=TopicsFormat.DEFAULT.value,
+                            help=f"Format of topics. Available: {[x.value for x in list(TopicsFormat)]}")
+    run_parser.add_argument('--output-format', type=str, metavar='format', default=OutputFormat.TREC.value,
+                            help=f"Format of output. Available: {[x.value for x in list(OutputFormat)]}")
+    run_parser.add_argument('--output', type=str, metavar='path', required=False, help="Path to output file.")
+    run_parser.add_argument('--max-passage', action='store_true',
+                            default=False, help="Select only max passage from document.")
+    run_parser.add_argument('--max-passage-hits', type=int, metavar='num', required=False, default=100,
+                            help="Final number of hits when selecting only max passage.")
+    run_parser.add_argument('--max-passage-delimiter', type=str, metavar='str', required=False, default='#',
+                            help="Delimiter between docid and passage id.")
+    run_parser.add_argument('--batch-size', type=int, metavar='num', required=False,
+                            default=1, help="Specify batch size to search the collection concurrently.")
+    run_parser.add_argument('--threads', type=int, metavar='num', required=False,
+                            default=1, help="Maximum number of threads to use.")
+
+    args = parse_args(parser, commands)
+
+    query_iterator = get_query_iterator(args.run.topics, TopicsFormat(args.run.topics_format))
+    topics = query_iterator.topics
+
+    query_encoder = init_query_encoder(args.dense.encoder,
+                                       args.dense.encoder_class,
+                                       args.dense.tokenizer,
+                                       args.run.topics,
+                                       args.dense.encoded_queries,
+                                       args.dense.device,
+                                       args.dense.query_prefix)
+
+    if os.path.exists(args.dense.index):
+        # create searcher from index directory
+        dsearcher = FaissSearcher(args.dense.index, query_encoder)
+    else:
+        # create searcher from prebuilt index name
+        dsearcher = FaissSearcher.from_prebuilt_index(args.dense.index, query_encoder)
+
+    if not dsearcher:
+        exit()
+
+    if os.path.exists(args.sparse.index):
+        # create searcher from index directory
+        if args.sparse.impact:
+            ssearcher = LuceneImpactSearcher(args.sparse.index, args.sparse.encoder, args.sparse.min_idf)
+        else:
+            ssearcher = LuceneSearcher(args.sparse.index)
+    else:
+        # create searcher from prebuilt index name
+        if args.sparse.impact:
+            ssearcher = LuceneImpactSearcher.from_prebuilt_index(args.sparse.index, args.sparse.encoder, args.sparse.min_idf)
+        else:
+            ssearcher = LuceneSearcher.from_prebuilt_index(args.sparse.index)
+
+    if not ssearcher:
+        exit()
+
+    set_bm25_parameters(ssearcher, args.sparse.index, args.sparse.k1, args.sparse.b)
+
+    if args.sparse.language != 'en':
+        ssearcher.set_language(args.sparse.language)
+
+    hsearcher = HybridSearcher(dsearcher, ssearcher)
+    if not hsearcher:
+        exit()
+
+    # build output path
+    output_path = args.run.output
+
+    print(f'Running {args.run.topics} topics, saving to {output_path}...')
+    tag = 'hybrid'
+
+    output_writer = get_output_writer(output_path, OutputFormat(args.run.output_format), 'w',
+                                      max_hits=args.run.hits, tag=tag, topics=topics,
+                                      use_max_passage=args.run.max_passage,
+                                      max_passage_delimiter=args.run.max_passage_delimiter,
+                                      max_passage_hits=args.run.max_passage_hits)
+
+    with output_writer:
+        batch_topics = list()
+        batch_topic_ids = list()
+        for index, (topic_id, text) in enumerate(tqdm(query_iterator, total=len(topics.keys()))):
+            if args.run.batch_size <= 1 and args.run.threads <= 1:
+                hits = hsearcher.search(text, args.fusion.hits, args.run.hits, args.fusion.alpha, args.fusion.normalization, args.fusion.weight_on_dense)
+                results = [(topic_id, hits)]
+            else:
+                batch_topic_ids.append(str(topic_id))
+                batch_topics.append(text)
+                if (index + 1) % args.run.batch_size == 0 or \
+                        index == len(topics.keys()) - 1:
+                    results = hsearcher.batch_search(
+                        batch_topics, batch_topic_ids, args.fusion.hits, args.run.hits, args.run.threads,
+                        args.fusion.alpha, args.fusion.normalization, args.fusion.weight_on_dense)
+                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
+                    batch_topic_ids.clear()
+                    batch_topics.clear()
+                else:
+                    continue
+
+            for topic, hits in results:
+                output_writer.write(topic, hits)
+
+            results.clear()
diff --git a/pyserini/search/hybrid/_searcher.py b/pyserini/search/hybrid/_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..0817f6c85ddf4b5b5bf554421dd3155fdefb8621
--- /dev/null
+++ b/pyserini/search/hybrid/_searcher.py
@@ -0,0 +1,81 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's hybrid searcher by Dense + Sparse
+"""
+
+from typing import List, Dict
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.search.faiss import FaissSearcher, DenseSearchResult
+
+
+class HybridSearcher:
+    """Hybrid Searcher for dense + sparse
+
+        Parameters
+        ----------
+        dense_searcher : FaissSearcher
+        sparse_searcher : LuceneSearcher
+    """
+
+    def __init__(self, dense_searcher, sparse_searcher):
+        self.dense_searcher = dense_searcher
+        self.sparse_searcher = sparse_searcher
+
+    def search(self, query: str, k0: int = 10, k: int = 10, alpha: float = 0.1, normalization: bool = False, weight_on_dense: bool = False) -> List[DenseSearchResult]:
+        dense_hits = self.dense_searcher.search(query, k0)
+        sparse_hits = self.sparse_searcher.search(query, k0)
+        return self._hybrid_results(dense_hits, sparse_hits, alpha, k, normalization, weight_on_dense)
+
+    def batch_search(self, queries: List[str], q_ids: List[str], k0: int = 10, k: int = 10, threads: int = 1,
+            alpha: float = 0.1, normalization: bool = False, weight_on_dense: bool = False) \
+            -> Dict[str, List[DenseSearchResult]]:
+        dense_result = self.dense_searcher.batch_search(queries, q_ids, k0, threads)
+        sparse_result = self.sparse_searcher.batch_search(queries, q_ids, k0, threads)
+        hybrid_result = {
+            key: self._hybrid_results(dense_result[key], sparse_result[key], alpha, k, normalization, weight_on_dense)
+            for key in dense_result
+        }
+        return hybrid_result
+
+    @staticmethod
+    def _hybrid_results(dense_results, sparse_results, alpha, k, normalization=False, weight_on_dense=False):
+        dense_hits = {hit.docid: hit.score for hit in dense_results}
+        sparse_hits = {hit.docid: hit.score for hit in sparse_results}
+        hybrid_result = []
+        min_dense_score = min(dense_hits.values()) if len(dense_hits) > 0 else 0
+        max_dense_score = max(dense_hits.values()) if len(dense_hits) > 0 else 1
+        min_sparse_score = min(sparse_hits.values()) if len(sparse_hits) > 0 else 0
+        max_sparse_score = max(sparse_hits.values()) if len(sparse_hits) > 0 else 1
+        for doc in set(dense_hits.keys()) | set(sparse_hits.keys()):
+            if doc not in dense_hits:
+                sparse_score = sparse_hits[doc]
+                dense_score = min_dense_score
+            elif doc not in sparse_hits:
+                sparse_score = min_sparse_score
+                dense_score = dense_hits[doc]
+            else:
+                sparse_score = sparse_hits[doc]
+                dense_score = dense_hits[doc]
+            if normalization:
+                sparse_score = (sparse_score - (min_sparse_score + max_sparse_score) / 2) \
+                               / (max_sparse_score - min_sparse_score)
+                dense_score = (dense_score - (min_dense_score + max_dense_score) / 2) \
+                              / (max_dense_score - min_dense_score)
+            score = alpha * sparse_score + dense_score if not weight_on_dense else sparse_score + alpha * dense_score
+            hybrid_result.append(DenseSearchResult(doc, score))
+        return sorted(hybrid_result, key=lambda x: x.score, reverse=True)[:k]
diff --git a/pyserini/search/lucene/__init__.py b/pyserini/search/lucene/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..22063fc37248c5cfc310c5fc4bd74c020b02c264
--- /dev/null
+++ b/pyserini/search/lucene/__init__.py
@@ -0,0 +1,29 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._geo_searcher import LuceneGeoSearcher
+from ._impact_searcher import JImpactSearcherResult, LuceneImpactSearcher, SlimSearcher
+from ._searcher import JLuceneSearcherResult, LuceneSimilarities, \
+    LuceneFusionSearcher, LuceneSearcher
+
+__all__ = ['JImpactSearcherResult',
+           'JLuceneSearcherResult',
+           'LuceneFusionSearcher',
+           'LuceneGeoSearcher',
+           'LuceneImpactSearcher',
+           'LuceneSearcher',
+           'SlimSearcher',
+           'LuceneSimilarities']
diff --git a/pyserini/search/lucene/__main__.py b/pyserini/search/lucene/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..85dac82c7f532fafcdb3da23558b449e0aeecdf4
--- /dev/null
+++ b/pyserini/search/lucene/__main__.py
@@ -0,0 +1,346 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import os
+
+from tqdm import tqdm
+from transformers import AutoTokenizer
+
+from pyserini.analysis import JDefaultEnglishAnalyzer, JWhiteSpaceAnalyzer
+from pyserini.output_writer import OutputFormat, get_output_writer
+from pyserini.pyclass import autoclass
+from pyserini.query_iterator import get_query_iterator, TopicsFormat
+from pyserini.search import JDisjunctionMaxQueryGenerator
+from . import LuceneImpactSearcher, LuceneSearcher, SlimSearcher
+from .reranker import ClassifierType, PseudoRelevanceClassifierReranker
+
+
+def set_bm25_parameters(searcher, index, k1=None, b=None):
+    if k1 is not None or b is not None:
+        if k1 is None or b is None:
+            print('Must set *both* k1 and b for BM25!')
+            exit()
+        print(f'Setting BM25 parameters: k1={k1}, b={b}')
+        searcher.set_bm25(k1, b)
+    else:
+        # Automatically set bm25 parameters based on known index...
+        if index == 'msmarco-passage' or index == 'msmarco-passage-slim' or index == 'msmarco-v1-passage' or \
+                index == 'msmarco-v1-passage-slim' or index == 'msmarco-v1-passage-full':
+            # See https://github.com/castorini/anserini/blob/master/docs/regressions-msmarco-passage.md
+            print('MS MARCO passage: setting k1=0.82, b=0.68')
+            searcher.set_bm25(0.82, 0.68)
+        elif index == 'msmarco-passage-expanded' or \
+                index == 'msmarco-v1-passage-d2q-t5' or \
+                index == 'msmarco-v1-passage-d2q-t5-docvectors':
+            # See https://github.com/castorini/anserini/blob/master/docs/regressions-msmarco-passage-docTTTTTquery.md
+            print('MS MARCO passage w/ doc2query-T5 expansion: setting k1=2.18, b=0.86')
+            searcher.set_bm25(2.18, 0.86)
+        elif index == 'msmarco-doc' or index == 'msmarco-doc-slim' or index == 'msmarco-v1-doc' or \
+                index == 'msmarco-v1-doc-slim' or index == 'msmarco-v1-doc-full':
+            # See https://github.com/castorini/anserini/blob/master/docs/regressions-msmarco-doc.md
+            print('MS MARCO doc: setting k1=4.46, b=0.82')
+            searcher.set_bm25(4.46, 0.82)
+        elif index == 'msmarco-doc-per-passage' or index == 'msmarco-doc-per-passage-slim' or \
+                index == 'msmarco-v1-doc-segmented' or index == 'msmarco-v1-doc-segmented-slim' or \
+                index == 'msmarco-v1-doc-segmented-full':
+            # See https://github.com/castorini/anserini/blob/master/docs/regressions-msmarco-doc-segmented.md
+            print('MS MARCO doc, per passage: setting k1=2.16, b=0.61')
+            searcher.set_bm25(2.16, 0.61)
+        elif index == 'msmarco-doc-expanded-per-doc' or \
+                index == 'msmarco-v1-doc-d2q-t5' or \
+                index == 'msmarco-v1-doc-d2q-t5-docvectors':
+            # See https://github.com/castorini/anserini/blob/master/docs/regressions-msmarco-doc-docTTTTTquery.md
+            print('MS MARCO doc w/ doc2query-T5 (per doc) expansion: setting k1=4.68, b=0.87')
+            searcher.set_bm25(4.68, 0.87)
+        elif index == 'msmarco-doc-expanded-per-passage' or \
+                index == 'msmarco-v1-doc-segmented-d2q-t5' or \
+                index == 'msmarco-v1-doc-segmented-d2q-t5-docvectors':
+            # See https://github.com/castorini/anserini/blob/master/docs/regressions-msmarco-doc-segmented-docTTTTTquery.md
+            print('MS MARCO doc w/ doc2query-T5 (per passage) expansion: setting k1=2.56, b=0.59')
+            searcher.set_bm25(2.56, 0.59)
+
+
+def define_search_args(parser):
+    parser.add_argument('--index', type=str, metavar='path to index or index name', required=True,
+                        help="Path to Lucene index or name of prebuilt index.")
+    parser.add_argument('--encoded-corpus', type=str, default=None, help="path to stored sparse vectors")
+
+    parser.add_argument('--impact', action='store_true', help="Use Impact.")
+    parser.add_argument('--encoder', type=str, default=None, help="encoder name")
+    parser.add_argument('--onnx-encoder', type=str, default=None, help="onnx encoder name")
+    parser.add_argument('--min-idf', type=int, default=0, help="minimum idf")
+
+    parser.add_argument('--bm25', action='store_true', default=True, help="Use BM25 (default).")
+    parser.add_argument('--k1', type=float, help='BM25 k1 parameter.')
+    parser.add_argument('--b', type=float, help='BM25 b parameter.')
+
+    parser.add_argument('--rm3', action='store_true', help="Use RM3")
+    parser.add_argument('--rocchio', action='store_true', help="Use Rocchio")
+    parser.add_argument('--rocchio-use-negative', action='store_true', help="Use nonrelevant labels in Rocchio")
+    parser.add_argument('--qld', action='store_true', help="Use QLD")
+
+    parser.add_argument('--language', type=str, help='language code for BM25, e.g. zh for Chinese', default='en')
+    parser.add_argument('--pretokenized', action='store_true', help="Boolean switch to accept pre-tokenized topics")
+
+    parser.add_argument('--prcl', type=ClassifierType, nargs='+', default=[],
+                        help='Specify the classifier PseudoRelevanceClassifierReranker uses.')
+    parser.add_argument('--prcl.vectorizer', dest='vectorizer', type=str,
+                        help='Type of vectorizer. Available: TfidfVectorizer, BM25Vectorizer.')
+    parser.add_argument('--prcl.r', dest='r', type=int, default=10,
+                        help='Number of positive labels in pseudo relevance feedback.')
+    parser.add_argument('--prcl.n', dest='n', type=int, default=100,
+                        help='Number of negative labels in pseudo relevance feedback.')
+    parser.add_argument('--prcl.alpha', dest='alpha', type=float, default=0.5,
+                        help='Alpha value for interpolation in pseudo relevance feedback.')
+
+    parser.add_argument('--fields', metavar="key=value", nargs='+',
+                        help='Fields to search with assigned float weights.')
+    parser.add_argument('--dismax', action='store_true', default=False,
+                        help='Use disjunction max queries when searching multiple fields.')
+    parser.add_argument('--dismax.tiebreaker', dest='tiebreaker', type=float, default=0.0,
+                        help='The tiebreaker weight to use in disjunction max queries.')
+
+    parser.add_argument('--stopwords', type=str, help='Path to file with customstopwords.')
+
+
+if __name__ == "__main__":
+    JLuceneSearcher = autoclass('io.anserini.search.SimpleSearcher')
+    parser = argparse.ArgumentParser(description='Search a Lucene index.')
+    define_search_args(parser)
+    parser.add_argument('--topics', type=str, metavar='topic_name', required=True,
+                        help="Name of topics. Available: robust04, robust05, core17, core18.")
+    parser.add_argument('--hits', type=int, metavar='num',
+                        required=False, default=1000, help="Number of hits.")
+    parser.add_argument('--topics-format', type=str, metavar='format', default=TopicsFormat.DEFAULT.value,
+                        help=f"Format of topics. Available: {[x.value for x in list(TopicsFormat)]}")
+    parser.add_argument('--output-format', type=str, metavar='format', default=OutputFormat.TREC.value,
+                        help=f"Format of output. Available: {[x.value for x in list(OutputFormat)]}")
+    parser.add_argument('--output', type=str, metavar='path',
+                        help="Path to output file.")
+    parser.add_argument('--max-passage',  action='store_true',
+                        default=False, help="Select only max passage from document.")
+    parser.add_argument('--max-passage-hits', type=int, metavar='num', required=False, default=100,
+                        help="Final number of hits when selecting only max passage.")
+    parser.add_argument('--max-passage-delimiter', type=str, metavar='str', required=False, default='#',
+                        help="Delimiter between docid and passage id.")
+    parser.add_argument('--batch-size', type=int, metavar='num', required=False,
+                        default=1, help="Specify batch size to search the collection concurrently.")
+    parser.add_argument('--threads', type=int, metavar='num', required=False,
+                        default=1, help="Maximum number of threads to use.")
+    parser.add_argument('--tokenizer', type=str, help='tokenizer used to preprocess topics')
+    parser.add_argument('--remove-duplicates', action='store_true', default=False, help="Remove duplicate docs.")
+    # For some test collections, a query is doc from the corpus (e.g., arguana in BEIR).
+    # We want to remove the query from the results. This is equivalent to -removeQuery in Java.
+    parser.add_argument('--remove-query', action='store_true', default=False, help="Remove query from results list.")
+
+    args = parser.parse_args()
+
+    query_iterator = get_query_iterator(args.topics, TopicsFormat(args.topics_format))
+    topics = query_iterator.topics
+
+    if not args.impact:
+        if os.path.exists(args.index):
+            # create searcher from index directory
+            searcher = LuceneSearcher(args.index)
+        else:
+            # create searcher from prebuilt index name
+            searcher = LuceneSearcher.from_prebuilt_index(args.index)
+    elif args.impact:
+        if args.encoder and args.onnx_encoder:
+            raise ValueError("Cannot specify both --encoder and --onnx-encoder")
+        if args.encoder:
+            if os.path.exists(args.index):
+                if args.encoded_corpus is not None:
+                    searcher = SlimSearcher(args.encoded_corpus, args.index, args.encoder, args.min_idf)
+                else:
+                    searcher = LuceneImpactSearcher(args.index, args.encoder, args.min_idf)
+            else:
+                if args.encoded_corpus is not None:
+                    searcher = SlimSearcher.from_prebuilt_index(args.encoded_corpus, args.index, args.encoder, args.min_idf)
+                else:
+                    searcher = LuceneImpactSearcher.from_prebuilt_index(args.index, args.encoder, args.min_idf)
+        elif args.onnx_encoder:
+            if os.path.exists(args.index):
+                if args.encoded_corpus is not None:
+                    searcher = SlimSearcher(args.encoded_corpus, args.index, args.onnx_encoder, args.min_idf)
+                else:
+                    searcher = LuceneImpactSearcher(args.index, args.onnx_encoder, args.min_idf, 'onnx')
+            else:
+                if args.encoded_corpus is not None:
+                    searcher = SlimSearcher.from_prebuilt_index(args.encoded_corpus, args.index, args.onnx_encoder, args.min_idf)
+                else:
+                    searcher = LuceneImpactSearcher.from_prebuilt_index(args.index, args.onnx_encoder, args.min_idf, 'onnx')
+        # These are the cases where we're specifying pre-encoded queries
+        elif os.path.exists(args.index):
+            searcher = LuceneImpactSearcher(args.index, args.encoder, args.min_idf)
+        else:
+            searcher = LuceneImpactSearcher.from_prebuilt_index(args.index, args.encoder, args.min_idf)
+
+    if args.language != 'en':
+        searcher.set_language(args.language)
+
+    if not searcher:
+        exit()
+
+    search_rankers = []
+
+    if args.qld:
+        search_rankers.append('qld')
+        searcher.set_qld()
+    elif args.bm25:
+        search_rankers.append('bm25')
+        set_bm25_parameters(searcher, args.index, args.k1, args.b)
+
+    if args.rm3:
+        search_rankers.append('rm3')
+        searcher.set_rm3()
+    
+    if args.rocchio:
+        search_rankers.append('rocchio')
+        if args.rocchio_use_negative:
+            searcher.set_rocchio(gamma=0.15, use_negative=True)
+        else:
+            searcher.set_rocchio()
+
+    fields = dict()
+    if args.fields:
+        fields = dict([pair.split('=') for pair in args.fields])
+        print(f'Searching over fields: {fields}')
+
+    query_generator = None
+    if args.dismax:
+        query_generator = JDisjunctionMaxQueryGenerator(args.tiebreaker)
+        print(f'Using dismax query generator with tiebreaker={args.tiebreaker}')
+    
+    if args.pretokenized:
+        analyzer = JWhiteSpaceAnalyzer()
+        searcher.set_analyzer(analyzer)
+        if args.tokenizer is not None:
+            raise ValueError(f"--tokenizer is not supported with when setting --pretokenized.")
+
+    if args.tokenizer != None:
+        analyzer = JWhiteSpaceAnalyzer()
+        searcher.set_analyzer(analyzer)
+        print(f'Using whitespace analyzer because of pretokenized topics')
+        tokenizer = AutoTokenizer.from_pretrained(args.tokenizer)
+        print(f'Using {args.tokenizer} to preprocess topics')
+
+    if args.stopwords:
+        analyzer = JDefaultEnglishAnalyzer.fromArguments('porter', False, args.stopwords)
+        searcher.set_analyzer(analyzer)
+        print(f'Using custom stopwords={args.stopwords}')
+
+    # get re-ranker
+    use_prcl = args.prcl and len(args.prcl) > 0 and args.alpha > 0
+    if use_prcl is True:
+        ranker = PseudoRelevanceClassifierReranker(
+            searcher.index_dir, args.vectorizer, args.prcl, r=args.r, n=args.n, alpha=args.alpha)
+
+    # build output path
+    output_path = args.output
+    if output_path is None:
+        if use_prcl is True:
+            clf_rankers = []
+            for t in args.prcl:
+                if t == ClassifierType.LR:
+                    clf_rankers.append('lr')
+                elif t == ClassifierType.SVM:
+                    clf_rankers.append('svm')
+
+            r_str = f'prcl.r_{args.r}'
+            n_str = f'prcl.n_{args.n}'
+            a_str = f'prcl.alpha_{args.alpha}'
+            clf_str = 'prcl_' + '+'.join(clf_rankers)
+            tokens1 = ['run', args.topics, '+'.join(search_rankers)]
+            tokens2 = [args.vectorizer, clf_str, r_str, n_str, a_str]
+            output_path = '.'.join(tokens1) + '-' + '-'.join(tokens2) + ".txt"
+        else:
+            tokens = ['run', args.topics, '+'.join(search_rankers), 'txt']
+            output_path = '.'.join(tokens)
+
+    print(f'Running {args.topics} topics, saving to {output_path}...')
+    tag = output_path[:-4] if args.output is None else 'Anserini'
+
+    output_writer = get_output_writer(output_path, OutputFormat(args.output_format), 'w',
+                                      max_hits=args.hits, tag=tag, topics=topics,
+                                      use_max_passage=args.max_passage,
+                                      max_passage_delimiter=args.max_passage_delimiter,
+                                      max_passage_hits=args.max_passage_hits)
+
+    with output_writer:
+        batch_topics = list()
+        batch_topic_ids = list()
+        for index, (topic_id, text) in enumerate(tqdm(query_iterator, total=len(topics.keys()))):
+            if (args.tokenizer != None):
+                toks = tokenizer.tokenize(text)
+                text = ' '
+                text = text.join(toks)
+            if args.batch_size <= 1 and args.threads <= 1:
+                if args.impact:
+                    hits = searcher.search(text, args.hits, fields=fields)
+                else:
+                    hits = searcher.search(text, args.hits, query_generator=query_generator, fields=fields)
+                results = [(topic_id, hits)]
+            else:
+                batch_topic_ids.append(str(topic_id))
+                batch_topics.append(text)
+                if (index + 1) % args.batch_size == 0 or \
+                        index == len(topics.keys()) - 1:
+                    if args.impact:
+                        results = searcher.batch_search(
+                            batch_topics, batch_topic_ids, args.hits, args.threads, fields=fields
+                        )
+                    else:
+                        results = searcher.batch_search(
+                            batch_topics, batch_topic_ids, args.hits, args.threads,
+                            query_generator=query_generator, fields=fields
+                        )
+                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
+                    batch_topic_ids.clear()
+                    batch_topics.clear()
+                else:
+                    continue
+
+            for topic, hits in results:
+                # do rerank
+                if use_prcl and len(hits) > (args.r + args.n):
+                    docids = [hit.docid.strip() for hit in hits]
+                    scores = [hit.score for hit in hits]
+                    scores, docids = ranker.rerank(docids, scores)
+                    docid_score_map = dict(zip(docids, scores))
+                    for hit in hits:
+                        hit.score = docid_score_map[hit.docid.strip()]
+                        
+                if args.remove_duplicates:
+                    seen_docids = set()
+                    dedup_hits = []
+                    for hit in hits:
+                        if hit.docid.strip() in seen_docids:
+                            continue
+                        seen_docids.add(hit.docid.strip())
+                        dedup_hits.append(hit)
+                    hits = dedup_hits
+
+                # For some test collections, a query is doc from the corpus (e.g., arguana in BEIR).
+                # We want to remove the query from the results.
+                if args.remove_query:
+                    hits = [hit for hit in hits if hit.docid != topic]
+
+                # write results
+                output_writer.write(topic, hits)
+
+            results.clear()
diff --git a/pyserini/search/lucene/__pycache__/__init__.cpython-310.pyc b/pyserini/search/lucene/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eea080b3f90ee9b33a5ce4551fdf5cf075b2e718
Binary files /dev/null and b/pyserini/search/lucene/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/search/lucene/__pycache__/_geo_searcher.cpython-310.pyc b/pyserini/search/lucene/__pycache__/_geo_searcher.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5adea0dd441895a94c023d9ffae989b8c870152e
Binary files /dev/null and b/pyserini/search/lucene/__pycache__/_geo_searcher.cpython-310.pyc differ
diff --git a/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc b/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d406066d7b5c5c6518c7e06abf74775abc4285f
Binary files /dev/null and b/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc differ
diff --git a/pyserini/search/lucene/__pycache__/_searcher.cpython-310.pyc b/pyserini/search/lucene/__pycache__/_searcher.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f7a381c4b8e4fbc4ad0d7be6b0acdfcf9d9cdf4
Binary files /dev/null and b/pyserini/search/lucene/__pycache__/_searcher.cpython-310.pyc differ
diff --git a/pyserini/search/lucene/_geo_searcher.py b/pyserini/search/lucene/_geo_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..24c1f7e467bc7068e36a849db030410ea0b01d86
--- /dev/null
+++ b/pyserini/search/lucene/_geo_searcher.py
@@ -0,0 +1,82 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python search interface to Anserini. The main entry point is the ``LuceneGeoSearcher``
+class, which wraps the Java class ``SimpleGeoSearcher`` in Anserini.
+"""
+
+import logging
+from typing import List
+
+from pyserini.pyclass import autoclass
+from pyserini.search import JQuery
+
+
+logger = logging.getLogger(__name__)
+
+
+# Wrappers around Lucene classes
+JSort = autoclass('org.apache.lucene.search.Sort')
+JLatLonDocValuesField = autoclass('org.apache.lucene.document.LatLonDocValuesField')
+JLatLonShape = autoclass('org.apache.lucene.document.LatLonShape')
+JQueryRelation = autoclass('org.apache.lucene.document.ShapeField$QueryRelation')
+JLongPoint = autoclass('org.apache.lucene.document.LongPoint')
+
+# Wrappers around Anserini classes
+JGeoSearcher = autoclass('io.anserini.search.SimpleGeoSearcher')
+JGeoSearcherResult = autoclass('io.anserini.search.SimpleSearcher$Result')
+
+
+class LuceneGeoSearcher:
+    """Wrapper class for ``SimpleGeoSearcher`` in Anserini.
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to Lucene index directory.
+    """
+
+    def __init__(self, index_dir: str):
+        self.index_dir = index_dir
+        self.object = JGeoSearcher(index_dir)
+
+    def search(self, q: JQuery, k: int = 10, sort: JSort = None) -> List[JGeoSearcherResult]:
+        """Search the collection.
+
+        Parameters
+        ----------
+        q : JQuery
+            Lucene query.
+        k : int
+            Number of hits to return.
+        sort : JSort
+            Optional distance sort that allows searcher to return results based on distance to a point.
+
+        Returns
+        -------
+        List[JGeoSearcherResult]
+            List of search results.
+        """
+        if sort:
+            hits = self.object.searchGeo(q, k, sort)
+        else:
+            hits = self.object.searchGeo(q, k)
+        return hits
+
+    def close(self):
+        """Close the searcher."""
+        self.object.close()
diff --git a/pyserini/search/lucene/_impact_searcher.py b/pyserini/search/lucene/_impact_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6d433526d6c56d0a885753b226e625362dd1434
--- /dev/null
+++ b/pyserini/search/lucene/_impact_searcher.py
@@ -0,0 +1,406 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python search interface to Anserini. The main entry point is the ``LuceneImpactSearcher``
+class, which wraps the Java class with the same name in Anserini.
+"""
+
+import logging
+import os
+import pickle
+from tqdm import tqdm
+from typing import Dict, List, Optional, Union
+from collections import namedtuple
+
+import numpy as np
+import scipy
+
+from pyserini.encode import QueryEncoder, TokFreqQueryEncoder, UniCoilQueryEncoder, \
+    CachedDataQueryEncoder, SpladeQueryEncoder, SlimQueryEncoder
+from pyserini.index import Document
+from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap
+from pyserini.util import download_prebuilt_index, download_encoded_corpus
+
+logger = logging.getLogger(__name__)
+
+# Wrappers around Anserini classes
+JImpactSearcher = autoclass('io.anserini.search.SimpleImpactSearcher')
+JImpactSearcherResult = autoclass('io.anserini.search.SimpleImpactSearcher$Result')
+
+
+class LuceneImpactSearcher:
+    """Wrapper class for ``ImpactSearcher`` in Anserini.
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to Lucene index directory.
+    query_encoder: QueryEncoder or str
+        QueryEncoder to encode query text
+    """
+
+    def __init__(self, index_dir: str, query_encoder: Union[QueryEncoder, str], min_idf=0, encoder_type: str='pytorch'):
+        self.index_dir = index_dir
+        self.idf = self._compute_idf(index_dir)
+        self.min_idf = min_idf
+        self.object = JImpactSearcher(index_dir)
+        self.num_docs = self.object.get_total_num_docs()
+        self.encoder_type = encoder_type
+        self.query_encoder = query_encoder
+        if encoder_type == 'onnx':
+            if isinstance(query_encoder, str) or query_encoder is None:
+                self.object.set_onnx_query_encoder(query_encoder)
+            else:
+                raise ValueError(f'Invalid query encoder type: {type(query_encoder)} for onnx encoder')
+        elif encoder_type == 'pytorch':
+            if isinstance(query_encoder, str) or query_encoder is None:
+                self.query_encoder = self._init_query_encoder_from_str(query_encoder)
+            else:
+                self.query_encoder = query_encoder
+        else:
+            raise ValueError(f'Invalid encoder type: {encoder_type}')
+
+    @classmethod
+    def from_prebuilt_index(cls, prebuilt_index_name: str, query_encoder: Union[QueryEncoder, str], min_idf=0, encoder_type: str='pytorch'):
+        """Build a searcher from a pre-built index; download the index if necessary.
+
+        Parameters
+        ----------
+        prebuilt_index_name : str
+            Prebuilt index name.
+        query_encoder: QueryEncoder or str
+            QueryEncoder to encode query text
+        min_idf : int
+            Minimum idf for query tokens
+        encoder_type : str
+            Encoder type, either 'pytorch' or 'onnx'
+
+        Returns
+        -------
+        LuceneSearcher
+            Searcher built from the prebuilt index.
+        """
+        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+        try:
+            index_dir = download_prebuilt_index(prebuilt_index_name)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        print(f'Initializing {prebuilt_index_name}...')
+        return cls(index_dir, query_encoder, min_idf, encoder_type)
+
+    def encode(self, query):
+        if self.encoder_type == 'onnx':
+            encoded_query = self.object.encode_with_onnx(query)
+        else:
+            encoded_query = self.query_encoder.encode(query)
+        return encoded_query
+
+    @staticmethod
+    def list_prebuilt_indexes():
+        """Display information about available prebuilt indexes."""
+        print("Not Implemented")
+
+    def search(self, q: str, k: int = 10, fields=dict()) -> List[JImpactSearcherResult]:
+        """Search the collection.
+
+        Parameters
+        ----------
+        q : str
+            Query string.
+        k : int
+            Number of hits to return.
+        min_idf : int
+            Minimum idf for query tokens
+        fields : dict
+            Optional map of fields to search with associated boosts.
+
+        Returns
+        -------
+        List[JImpactSearcherResult]
+            List of search results.
+        """
+
+        jfields = JHashMap()
+        for (field, boost) in fields.items():
+            jfields.put(field, JFloat(boost))
+
+        encoded_query = self.encode(q)
+
+        jquery = encoded_query
+        if self.encoder_type == 'pytorch':
+            for (token, weight) in encoded_query.items():
+                if token in self.idf and self.idf[token] > self.min_idf:
+                    jquery.put(token, JFloat(weight))
+
+        if not fields:
+            hits = self.object.search(jquery, k)
+        else:
+            hits = self.object.searchFields(jquery, jfields, k)
+
+        return hits
+
+    def batch_search(self, queries: List[str], qids: List[str],
+                     k: int = 10, threads: int = 1, fields=dict()) -> Dict[str, List[JImpactSearcherResult]]:
+        """Search the collection concurrently for multiple queries, using multiple threads.
+
+        Parameters
+        ----------
+        queries : List[str]
+            List of query string.
+        qids : List[str]
+            List of corresponding query ids.
+        k : int
+            Number of hits to return.
+        threads : int
+            Maximum number of threads to use.
+        min_idf : int
+            Minimum idf for query tokens
+        fields : dict
+            Optional map of fields to search with associated boosts.
+
+        Returns
+        -------
+        Dict[str, List[JImpactSearcherResult]]
+            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
+            results as the values.
+        """
+        query_lst = JArrayList()
+        qid_lst = JArrayList()
+        for q in queries:
+            encoded_query = self.encode(q)
+            jquery = JHashMap()
+            if self.encoder_type == 'pytorch':
+                for (token, weight) in encoded_query.items():
+                    if token in self.idf and self.idf[token] > self.min_idf:
+                        jquery.put(token, JFloat(weight))
+            else:
+                jquery = encoded_query
+            query_lst.add(jquery)
+
+        for qid in qids:
+            jqid = qid
+            qid_lst.add(jqid)
+
+        jfields = JHashMap()
+        for (field, boost) in fields.items():
+            jfields.put(field, JFloat(boost))
+
+        if not fields:
+            results = self.object.batch_search(query_lst, qid_lst, int(k), int(threads))
+        else:
+            results = self.object.batch_search_fields(query_lst, qid_lst, int(k), int(threads), jfields)
+        return {r.getKey(): r.getValue() for r in results.entrySet().toArray()}
+
+    def doc(self, docid: Union[str, int]) -> Optional[Document]:
+        """Return the :class:`Document` corresponding to ``docid``. The ``docid`` is overloaded: if it is of type
+        ``str``, it is treated as an external collection ``docid``; if it is of type ``int``, it is treated as an
+        internal Lucene ``docid``. Method returns ``None`` if the ``docid`` does not exist in the index.
+
+        Parameters
+        ----------
+        docid : Union[str, int]
+            Overloaded ``docid``: either an external collection ``docid`` (``str``) or an internal Lucene ``docid``
+            (``int``).
+
+        Returns
+        -------
+        Document
+            :class:`Document` corresponding to the ``docid``.
+        """
+        lucene_document = self.object.document(docid)
+        if lucene_document is None:
+            return None
+        return Document(lucene_document)
+
+    def doc_by_field(self, field: str, q: str) -> Optional[Document]:
+        """Return the :class:`Document` based on a ``field`` with ``id``. For example, this method can be used to fetch
+        document based on alternative primary keys that have been indexed, such as an article's DOI. Method returns
+        ``None`` if no such document exists.
+
+        Parameters
+        ----------
+        field : str
+            Field to look up.
+        q : str
+            Unique id of document.
+
+        Returns
+        -------
+        Document
+            :class:`Document` whose ``field`` is ``id``.
+        """
+        lucene_document = self.object.documentByField(field, q)
+        if lucene_document is None:
+            return None
+        return Document(lucene_document)
+
+    def close(self):
+        """Close the searcher."""
+        self.object.close()
+
+    @staticmethod
+    def _init_query_encoder_from_str(query_encoder):
+        if query_encoder is None:
+            return TokFreqQueryEncoder()
+        elif os.path.isfile(query_encoder) and (query_encoder.endswith('jsonl') or query_encoder.encode('json')):
+            return CachedDataQueryEncoder(query_encoder)
+        elif 'unicoil' in query_encoder.lower():
+            return UniCoilQueryEncoder(query_encoder)
+        elif 'splade' in query_encoder.lower():
+            return SpladeQueryEncoder(query_encoder)
+        elif 'slim' in query_encoder.lower():
+            return SlimQueryEncoder(query_encoder)
+
+    @staticmethod
+    def _compute_idf(index_path):
+        from pyserini.index.lucene import IndexReader
+        index_reader = IndexReader(index_path)
+        tokens = []
+        dfs = []
+        for term in index_reader.terms():
+            dfs.append(term.df)
+            tokens.append(term.term)
+        idfs = np.log((index_reader.stats()['documents'] / (np.array(dfs))))
+        return dict(zip(tokens, idfs))
+
+
+SlimResult = namedtuple("SlimResult", "docid score")
+
+def maxsim(entry):
+    q_embed, d_embeds, d_lens, qid, scores, docids = entry
+    if len(d_embeds) == 0:
+        return qid, scores, docids
+    d_embeds = scipy.sparse.vstack(d_embeds).transpose() # (LD x 1000) x D
+    max_scores = (q_embed@d_embeds).todense() # LQ x (LD x 1000)
+    scores = []
+    start = 0
+    for d_len in d_lens:
+        scores.append(max_scores[:, start:start+d_len].max(1).sum())
+        start += d_len
+    scores, docids = list(zip(*sorted(list(zip(scores, docids)), key=lambda x: -x[0])))
+    return qid, scores, docids
+
+class SlimSearcher(LuceneImpactSearcher):
+    def __init__(self, encoded_corpus, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        print("Loading sparse corpus vectors for fast reranking...")
+        with open(os.path.join(encoded_corpus, "sparse_range.pkl"), "rb") as f:
+            self.sparse_ranges = pickle.load(f)
+        sparse_vecs = scipy.sparse.load_npz(os.path.join(encoded_corpus, "sparse_vec.npz"))
+        self.sparse_vecs = [sparse_vecs[start:end] for start, end in tqdm(self.sparse_ranges)]
+    
+    @classmethod
+    def from_prebuilt_index(cls, encoded_corpus:str, prebuilt_index_name: str, query_encoder: Union[QueryEncoder, str], min_idf=0):
+        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+        try:
+            index_dir = download_prebuilt_index(prebuilt_index_name)
+            encoded_corpus = download_encoded_corpus(encoded_corpus)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        print(f'Initializing {prebuilt_index_name}...')
+        return cls(encoded_corpus, index_dir, query_encoder, min_idf)
+
+    def search(self, q: str, k: int = 10, fields=dict()) -> List[JImpactSearcherResult]:
+        jfields = JHashMap()
+        for (field, boost) in fields.items():
+            jfields.put(field, JFloat(boost))
+
+        fusion_encoded_query, sparse_encoded_query = self.query_encoder.encode(q, return_sparse=True)
+        jquery = JHashMap()
+        for (token, weight) in fusion_encoded_query.items():
+            if token in self.idf and self.idf[token] > self.min_idf:
+                jquery.put(token, JFloat(weight))
+
+        if self.sparse_vecs is not None:
+            search_k = k * (self.min_idf + 1)
+        if not fields:
+            hits = self.object.search(jquery, search_k)
+        else:
+            hits = self.object.searchFields(jquery, jfields, search_k)
+        hits = self.fast_rerank([sparse_encoded_query], {0: hits}, k)[0]
+        return hits
+    
+    def batch_search(self, queries: List[str], qids: List[str],
+                     k: int = 10, threads: int = 1, fields=dict()) -> Dict[str, List[JImpactSearcherResult]]:
+        query_lst = JArrayList()
+        qid_lst = JArrayList()
+        sparse_encoded_queries = {}
+        for qid, q in zip(qids, queries):
+            fusion_encoded_query, sparse_encoded_query = self.query_encoder.encode(q, return_sparse=True)
+            jquery = JHashMap()
+            for (token, weight) in fusion_encoded_query.items():
+                if token in self.idf and self.idf[token] > self.min_idf:
+                    jquery.put(token, JFloat(weight))
+            query_lst.add(jquery)
+            sparse_encoded_queries[qid] = sparse_encoded_query
+
+        for qid in qids:
+            jqid = qid
+            qid_lst.add(jqid)
+
+        jfields = JHashMap()
+        for (field, boost) in fields.items():
+            jfields.put(field, JFloat(boost))
+        
+        if not fields:
+            results = self.object.batch_search(query_lst, qid_lst, k * (self.min_idf + 1), threads)
+        else:
+            results = self.object.batch_search_fields(query_lst, qid_lst, k * (self.min_idf + 1), threads, jfields)
+        
+        results = {r.getKey(): r.getValue() for r in results.entrySet().toArray()}
+        results = self.fast_rerank(sparse_encoded_queries, results, k)
+        return results
+
+    def fast_rerank(self, q_embeds, results, k):
+        all_scores = []
+        all_docids = []
+        all_q_embeds = []
+        all_d_embeds = []
+        all_d_lens = []
+        qids = []
+        for qid in results.keys():
+            all_q_embeds.append(q_embeds[qid])
+            qids.append(qid)
+            hits = results[qid]
+            docids = []
+            scores = []
+            d_embeds = []
+            d_lens = []
+            for hit in hits:
+                docids.append(hit.docid)
+                scores.append(hit.score)
+                start, end = self.sparse_ranges[int(hit.docid)]
+                d_embeds.append(self.sparse_vecs[int(hit.docid)])
+                d_lens.append(end-start)
+            all_scores.append(scores)
+            all_docids.append(docids)
+            all_d_embeds.append(d_embeds)
+            all_d_lens.append(d_lens)
+
+        entries = list(zip(all_q_embeds, all_d_embeds, all_d_lens, qids, all_scores, all_docids))
+        results = [maxsim(entry) for entry in entries]
+        anserini_results = {}
+        for qid, scores, docids in results:
+            hits = []
+            for score, docid in list(zip(scores, docids))[:k]:
+                hits.append(SlimResult(docid, score))
+            anserini_results[qid] = hits
+        return anserini_results
diff --git a/pyserini/search/lucene/_searcher.py b/pyserini/search/lucene/_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..45677db52f428fc00bca5b75d7dd464cb6d846bc
--- /dev/null
+++ b/pyserini/search/lucene/_searcher.py
@@ -0,0 +1,477 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python search interface to Anserini. The main entry point is the ``LuceneSearcher``
+class, which wraps the Java class with the same name in Anserini.
+"""
+
+import logging
+from typing import Dict, List, Optional, Union
+
+from pyserini.fusion import FusionMethod, reciprocal_rank_fusion
+from pyserini.index import Document, IndexReader
+from pyserini.pyclass import autoclass, JFloat, JArrayList, JHashMap
+from pyserini.search import JQuery, JQueryGenerator
+from pyserini.trectools import TrecRun
+from pyserini.util import download_prebuilt_index, get_sparse_indexes_info
+
+logger = logging.getLogger(__name__)
+
+
+# Wrappers around Anserini classes
+JLuceneSearcher = autoclass('io.anserini.search.SimpleSearcher')
+JLuceneSearcherResult = autoclass('io.anserini.search.SimpleSearcher$Result')
+
+
+class LuceneSearcher:
+    """Wrapper class for ``SimpleSearcher`` in Anserini.
+
+    Parameters
+    ----------
+    index_dir : str
+        Path to Lucene index directory.
+    """
+
+    def __init__(self, index_dir: str, prebuilt_index_name=None):
+        self.index_dir = index_dir
+        self.object = JLuceneSearcher(index_dir)
+        self.num_docs = self.object.get_total_num_docs()
+        # Keep track if self is a known pre-built index.
+        self.prebuilt_index_name = prebuilt_index_name
+
+    @classmethod
+    def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False):
+        """Build a searcher from a pre-built index; download the index if necessary.
+
+        Parameters
+        ----------
+        prebuilt_index_name : str
+            Prebuilt index name.
+        verbose : bool
+            Print status information.
+
+        Returns
+        -------
+        LuceneSearcher
+            Searcher built from the prebuilt index.
+        """
+        if verbose:
+            print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+
+        try:
+            index_dir = download_prebuilt_index(prebuilt_index_name, verbose=verbose)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        # Currently, the only way to validate stats is to create a separate IndexReader, because there is no method
+        # to obtain the underlying reader of a SimpleSearcher; see https://github.com/castorini/anserini/issues/2013
+        index_reader = IndexReader(index_dir)
+        # This is janky as we're created a separate IndexReader for the sole purpose of validating index stats.
+        index_reader.validate(prebuilt_index_name, verbose=verbose)
+
+        if verbose:
+            print(f'Initializing {prebuilt_index_name}...')
+
+        return cls(index_dir, prebuilt_index_name=prebuilt_index_name)
+
+    @staticmethod
+    def list_prebuilt_indexes():
+        """Display information about available prebuilt indexes."""
+        get_sparse_indexes_info()
+
+    def search(self, q: Union[str, JQuery], k: int = 10, query_generator: JQueryGenerator = None,
+               fields=dict(), strip_segment_id=False, remove_dups=False) -> List[JLuceneSearcherResult]:
+        """Search the collection.
+
+        Parameters
+        ----------
+        q : Union[str, JQuery]
+            Query string or the ``JQuery`` objected.
+        k : int
+            Number of hits to return.
+        query_generator : JQueryGenerator
+            Generator to build queries. Set to ``None`` by default to use Anserini default.
+        fields : dict
+            Optional map of fields to search with associated boosts.
+        strip_segment_id : bool
+            Remove the .XXXXX suffix used to denote different segments from an document.
+        remove_dups : bool
+            Remove duplicate docids when writing final run output.
+
+        Returns
+        -------
+        List[JLuceneSearcherResult]
+            List of search results.
+        """
+
+        jfields = JHashMap()
+        for (field, boost) in fields.items():
+            jfields.put(field, JFloat(boost))
+
+        hits = None
+        if query_generator:
+            if not fields:
+                hits = self.object.search(query_generator, q, k)
+            else:
+                hits = self.object.searchFields(query_generator, q, jfields, k)
+        elif isinstance(q, JQuery):
+            # Note that RM3 requires the notion of a query (string) to estimate the appropriate models. If we're just
+            # given a Lucene query, it's unclear what the "query" is for this estimation. One possibility is to extract
+            # all the query terms from the Lucene query, although this might yield unexpected behavior from the user's
+            # perspective. Until we think through what exactly is the "right thing to do", we'll raise an exception
+            # here explicitly.
+            if self.is_using_rm3():
+                raise NotImplementedError('RM3 incompatible with search using a Lucene query.')
+            if fields:
+                raise NotImplementedError('Cannot specify fields to search when using a Lucene query.')
+            hits = self.object.search(q, k)
+        else:
+            if not fields:
+                hits = self.object.search(q, k)
+            else:
+                hits = self.object.search_fields(q, jfields, k)
+
+        docids = set()
+        filtered_hits = []
+
+        for hit in hits:
+            if strip_segment_id is True:
+                hit.docid = hit.docid.split('.')[0]
+
+            if hit.docid in docids:
+                continue
+
+            filtered_hits.append(hit)
+
+            if remove_dups is True:
+                docids.add(hit.docid)
+
+        return filtered_hits
+
+    def batch_search(self, queries: List[str], qids: List[str], k: int = 10, threads: int = 1,
+                     query_generator: JQueryGenerator = None, fields = dict()) -> Dict[str, List[JLuceneSearcherResult]]:
+        """Search the collection concurrently for multiple queries, using multiple threads.
+
+        Parameters
+        ----------
+        queries : List[str]
+            List of query strings.
+        qids : List[str]
+            List of corresponding query ids.
+        k : int
+            Number of hits to return.
+        threads : int
+            Maximum number of threads to use.
+        query_generator : JQueryGenerator
+            Generator to build queries. Set to ``None`` by default to use Anserini default.
+        fields : dict
+            Optional map of fields to search with associated boosts.
+
+        Returns
+        -------
+        Dict[str, List[JLuceneSearcherResult]]
+            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
+            results as the values.
+        """
+        query_strings = JArrayList()
+        qid_strings = JArrayList()
+        for query in queries:
+            query_strings.add(query)
+
+        for qid in qids:
+            qid_strings.add(qid)
+
+        jfields = JHashMap()
+        for (field, boost) in fields.items():
+            jfields.put(field, JFloat(boost))
+
+        if query_generator:
+            if not fields:
+                results = self.object.batch_search(query_generator, query_strings, qid_strings, int(k), int(threads))
+            else:
+                results = self.object.batch_search_fields(query_generator, query_strings, qid_strings, int(k), int(threads), jfields)
+        else:
+            if not fields:
+                results = self.object.batch_search(query_strings, qid_strings, int(k), int(threads))
+            else:
+                results = self.object.batch_search_fields(query_strings, qid_strings, int(k), int(threads), jfields)
+        return {r.getKey(): r.getValue() for r in results.entrySet().toArray()}
+
+    def get_feedback_terms(self, q: str) -> Dict[str, float]:
+        """Returns feedback terms and their weights.
+
+        Parameters
+        ----------
+        q : str
+            Query string or the ``JQuery`` objected.
+
+        Returns
+        -------
+        Dict[str, float]
+            Feedback terms and their weights.
+        """
+
+        terms_map = self.object.get_feedback_terms(q)
+        if terms_map:
+            return {r.getKey(): r.getValue() for r in terms_map.entrySet().toArray()}
+        else:
+            return None
+
+    def set_analyzer(self, analyzer):
+        """Set the Java ``Analyzer`` to use.
+
+        Parameters
+        ----------
+        analyzer : JAnalyzer
+            Java ``Analyzer`` object.
+        """
+        self.object.set_analyzer(analyzer)
+
+    def set_language(self, language):
+        """Set language of LuceneSearcher"""
+        self.object.set_language(language)
+
+    def set_rm3(self, fb_terms=10, fb_docs=10, original_query_weight=float(0.5), debug=False, filter_terms=True):
+        """Configure RM3 pseudo-relevance feedback.
+
+        Parameters
+        ----------
+        fb_terms : int
+            RM3 parameter for number of expansion terms.
+        fb_docs : int
+            RM3 parameter for number of expansion documents.
+        original_query_weight : float
+            RM3 parameter for weight to assign to the original query.
+        debug : bool
+            Print the original and expanded queries as debug output.
+        filter_terms: bool
+            Whether to remove non-English terms.
+        """
+        if self.object.reader.getTermVectors(0):
+            self.object.set_rm3(None, fb_terms, fb_docs, original_query_weight, debug, filter_terms)
+        elif self.prebuilt_index_name in ['msmarco-v1-passage', 'msmarco-v1-doc', 'msmarco-v1-doc-segmented']:
+            self.object.set_rm3('JsonCollection', fb_terms, fb_docs, original_query_weight, debug, filter_terms)
+        elif self.prebuilt_index_name in ['msmarco-v2-passage', 'msmarco-v2-passage-augmented']:
+            self.object.set_rm3('MsMarcoV2PassageCollection', fb_terms, fb_docs, original_query_weight, debug, filter_terms)
+        elif self.prebuilt_index_name in ['msmarco-v2-doc', 'msmarco-v2-doc-segmented']:
+            self.object.set_rm3('MsMarcoV2DocCollection', fb_terms, fb_docs, original_query_weight, debug, filter_terms)
+        else:
+            raise TypeError("RM3 is not supported for indexes without document vectors.")
+
+    def unset_rm3(self):
+        """Disable RM3 pseudo-relevance feedback."""
+        self.object.unset_rm3()
+
+    def is_using_rm3(self) -> bool:
+        """Check if RM3 pseudo-relevance feedback is being performed."""
+        return self.object.use_rm3()
+    
+    def set_rocchio(self, top_fb_terms=10, top_fb_docs=10, bottom_fb_terms=10, bottom_fb_docs=10,
+                    alpha=1, beta=0.75, gamma=0, debug=False, use_negative=False):
+        """Configure Rocchio pseudo-relevance feedback.
+
+        Parameters
+        ----------
+        top_fb_terms : int
+            Rocchio parameter for number of relevant expansion terms.
+        top_fb_docs : int
+            Rocchio parameter for number of relevant expansion documents.
+        bottom_fb_terms : int
+            Rocchio parameter for number of non-relevant expansion terms.
+        bottom_fb_docs : int
+            Rocchio parameter for number of non-relevant expansion documents.
+        alpha : float
+            Rocchio parameter for weight to assign to the original query.
+        beta: float
+            Rocchio parameter for weight to assign to the relevant document vector.
+        gamma: float
+            Rocchio parameter for weight to assign to the nonrelevant document vector.
+        debug : bool
+            Print the original and expanded queries as debug output.
+        use_negative : bool
+            Rocchio parameter to use negative labels.
+        """
+        if self.object.reader.getTermVectors(0):
+            self.object.set_rocchio(None, top_fb_terms, top_fb_docs, bottom_fb_terms, bottom_fb_docs,
+                                    alpha, beta, gamma, debug, use_negative)
+        elif self.prebuilt_index_name in ['msmarco-v1-passage', 'msmarco-v1-doc', 'msmarco-v1-doc-segmented']:
+            self.object.set_rocchio('JsonCollection', top_fb_terms, top_fb_docs, bottom_fb_terms, bottom_fb_docs,
+                                    alpha, beta, gamma, debug, use_negative)
+        # Note, we don't have any Pyserini 2CRs that use Rocchio for MS MARCO v2, so there's currently no
+        # corresponding code branch here. To avoid introducing bugs (without 2CR tests), we'll add when it's needed.
+        else:
+            raise TypeError("Rocchio is not supported for indexes without document vectors.")
+
+    def unset_rocchio(self):
+        """Disable Rocchio pseudo-relevance feedback."""
+        self.object.unset_rocchio()
+
+    def is_using_rocchio(self) -> bool:
+        """Check if Rocchio pseudo-relevance feedback is being performed."""
+        return self.object.use_rocchio()
+
+    def set_qld(self, mu=float(1000)):
+        """Configure query likelihood with Dirichlet smoothing as the scoring function.
+
+        Parameters
+        ----------
+        mu : float
+            Dirichlet smoothing parameter mu.
+        """
+        self.object.set_qld(float(mu))
+
+    def set_bm25(self, k1=float(0.9), b=float(0.4)):
+        """Configure BM25 as the scoring function.
+
+        Parameters
+        ----------
+        k1 : float
+            BM25 k1 parameter.
+        b : float
+            BM25 b parameter.
+        """
+        self.object.set_bm25(float(k1), float(b))
+
+    def get_similarity(self):
+        """Return the Lucene ``Similarity`` used as the scoring function."""
+        return self.object.get_similarity()
+
+    def doc(self, docid: Union[str, int]) -> Optional[Document]:
+        """Return the :class:`Document` corresponding to ``docid``. The ``docid`` is overloaded: if it is of type
+        ``str``, it is treated as an external collection ``docid``; if it is of type ``int``, it is treated as an
+        internal Lucene ``docid``. Method returns ``None`` if the ``docid`` does not exist in the index.
+
+        Parameters
+        ----------
+        docid : Union[str, int]
+            Overloaded ``docid``: either an external collection ``docid`` (``str``) or an internal Lucene ``docid``
+            (``int``).
+
+        Returns
+        -------
+        Document
+            :class:`Document` corresponding to the ``docid``.
+        """
+        lucene_document = self.object.doc(docid)
+        if lucene_document is None:
+            return None
+        return Document(lucene_document)
+
+    def batch_doc(self, docids: List[str], threads: int) -> Dict[str, Document]:
+        """Concurrently fetching documents for multiple document ids.
+        Return dictionary that maps ``docid`` to :class:`Document`. Returned dictionary does not
+        contain ``docid`` if a corresponding :class:`Document` does not exist in the index.
+
+        Parameters
+        ----------
+        docids : List[str]
+            An external collection ``docid`` (``str``).
+        threads : int
+            Maximum number of threads to use.
+
+        Returns
+        -------
+        Dict[str, Document]
+            Dictionary mapping the ``docid`` to the corresponding :class:`Document`.
+        """
+        docid_strings = JArrayList()
+        for docid in docids:
+            docid_strings.add(docid)
+
+        results = self.object.batch_get_docs(docid_strings, threads)
+        batch_document = {r.getKey(): Document(r.getValue())
+                          for r in results.entrySet().toArray()}
+        return batch_document
+
+    def doc_by_field(self, field: str, q: str) -> Optional[Document]:
+        """Return the :class:`Document` based on a ``field`` with ``id``. For example, this method can be used to fetch
+        document based on alternative primary keys that have been indexed, such as an article's DOI. Method returns
+        ``None`` if no such document exists.
+
+        Parameters
+        ----------
+        field : str
+            Field to look up.
+        q : str
+            Unique id of document.
+
+        Returns
+        -------
+        Document
+            :class:`Document` whose ``field`` is ``id``.
+        """
+        lucene_document = self.object.doc_by_field(field, q)
+        if lucene_document is None:
+            return None
+        return Document(lucene_document)
+
+    def close(self):
+        """Close the searcher."""
+        self.object.close()
+
+
+class LuceneSimilarities:
+    @staticmethod
+    def bm25(k1=0.9, b=0.4):
+        return autoclass('org.apache.lucene.search.similarities.BM25Similarity')(k1, b)
+
+    @staticmethod
+    def qld(mu=1000):
+        return autoclass('org.apache.lucene.search.similarities.LMDirichletSimilarity')(mu)
+
+
+class LuceneFusionSearcher:
+    def __init__(self, index_dirs: List[str], method: FusionMethod):
+        self.method = method
+        self.searchers = [LuceneSearcher(index_dir) for index_dir in index_dirs]
+
+    def get_searchers(self) -> List[LuceneSearcher]:
+        return self.searchers
+
+    def search(self, q: Union[str, JQuery], k: int = 10, query_generator: JQueryGenerator = None, strip_segment_id=False, remove_dups=False) -> List[JLuceneSearcherResult]:
+        trec_runs, docid_to_search_result = list(), dict()
+
+        for searcher in self.searchers:
+            docid_score_pair = list()
+            hits = searcher.search(q, k=k, query_generator=query_generator,
+                                   strip_segment_id=strip_segment_id, remove_dups=remove_dups)
+
+            for hit in hits:
+                docid_to_search_result[hit.docid] = hit
+                docid_score_pair.append((hit.docid, hit.score))
+
+            run = TrecRun.from_search_results(docid_score_pair)
+            trec_runs.append(run)
+
+        if self.method == FusionMethod.RRF:
+            fused_run = reciprocal_rank_fusion(trec_runs, rrf_k=60, depth=1000, k=1000)
+        else:
+            raise NotImplementedError()
+
+        return self.convert_to_search_result(fused_run, docid_to_search_result)
+
+    @staticmethod
+    def convert_to_search_result(run: TrecRun, docid_to_search_result: Dict[str, JLuceneSearcherResult]) -> List[JLuceneSearcherResult]:
+        search_results = []
+
+        for _, _, docid, _, score, _ in run.to_numpy():
+            search_result = docid_to_search_result[docid]
+            search_result.score = score
+            search_results.append(search_result)
+
+        return search_results
diff --git a/pyserini/search/lucene/irst/__init__.py b/pyserini/search/lucene/irst/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..463ef3d6224259ae0c5e01f4f16a2a1e81da61ba
--- /dev/null
+++ b/pyserini/search/lucene/irst/__init__.py
@@ -0,0 +1,18 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._searcher import LuceneIrstSearcher
+__all__ = ['LuceneIrstSearcher']
diff --git a/pyserini/search/lucene/irst/__main__.py b/pyserini/search/lucene/irst/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..27ed16b4a7933279081d4aa6756ddd3151e24312
--- /dev/null
+++ b/pyserini/search/lucene/irst/__main__.py
@@ -0,0 +1,162 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import argparse
+from typing import List
+from tqdm import tqdm
+from transformers import AutoTokenizer
+from pyserini.search import get_topics
+from pyserini.search.lucene.irst import LuceneIrstSearcher
+
+
+def normalize(scores: List[float]):
+    low = min(scores)
+    high = max(scores)
+    width = high - low
+    if width != 0:
+        return [(s-low)/width for s in scores]
+    return scores
+
+
+def query_loader(topic: str):
+    queries = {}
+    bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+    topics_dic = get_topics(topic)
+    line_num = 0
+    for topic_id in topics_dic:
+        line_num += 1
+        query_text = topics_dic[topic_id]['title']
+        text_bert_tok = bert_tokenizer.tokenize(query_text.lower())
+        if len(text_bert_tok) >= 0:
+            query = {"raw": query_text,
+                "contents": ' '.join(text_bert_tok)}
+            queries[topic_id] = query
+        if line_num % 10000 == 0:
+            print(f"Processed {line_num} queries")
+    print(f"Processed {line_num} queries")
+    return queries
+
+
+def baseline_loader(base_path: str):
+    result_dic = {}
+    with open(base_path, 'r') as f:
+        for line in f:
+            tokens = line.strip().split()
+            topic = tokens[0]
+            doc_id = tokens[2]
+            score = float(tokens[-2])
+            if topic in result_dic.keys():
+                result_dic[topic][0].append(doc_id)
+                result_dic[topic][1].append(score)
+            else:
+                result_dic[topic] = [[doc_id], [score]]
+
+    return result_dic
+
+
+def generate_maxP(preds: List[float], docs: List[str]):
+    scores = {}
+    for index, (score, doc_id) in enumerate(zip(preds, docs)):
+        docid = doc_id.split('#')[0]
+        if (docid not in scores or score > scores[docid]):
+            scores[docid] = score
+    docid_scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
+    return docid_scores
+
+
+def sort_dual_list(pred: List[float], docs: List[str]):
+    zipped_lists = zip(pred, docs)
+    sorted_pairs = sorted(zipped_lists)
+
+    tuples = zip(*sorted_pairs)
+    pred, docs = [list(tuple) for tuple in tuples]
+
+    pred.reverse()
+    docs.reverse()
+    return pred, docs
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='use ibm model 1 feature to rerank the base run file')
+    parser.add_argument('--tag', type=str, default="ibm",
+                        metavar="tag_name", help='tag name for resulting Qrun')
+    parser.add_argument('--base-path', type=str, required=False,
+                        metavar="path_to_base_run", help='path to base run')
+    parser.add_argument('--topics', type=str, required=True,
+                        help='existing topics name or path to query topics')
+    parser.add_argument('--index', type=str, required=True,
+                        metavar="path_to_lucene_index", help='path to lucene index folder')
+    parser.add_argument('--output', type=str, required=True,
+                        metavar="path_to_reranked_run", help='the path to store reranked run file')
+    parser.add_argument('--alpha', type=float, default="0.3",
+                        metavar="type of field", help='interpolation weight')
+    parser.add_argument('--num-threads', type=int, default="24",
+                        metavar="num_of_threads", help='number of threads to use')
+    parser.add_argument('--max-sim', default=False, action="store_true",
+                        help='whether we use max sim operator or avg instead')
+    parser.add_argument('--segments', default=False, action="store_true",
+                        help='whether we use segmented index or not')
+    parser.add_argument('--k1', type=float, default="0.81",
+                        metavar="bm25_k1_parameter", help='k1 parameter for bm25 search')
+    parser.add_argument('--b', type=float, default="0.68",
+                        metavar="bm25_b_parameter", help='b parameter for bm25 search')
+    parser.add_argument('--hits', type=int, metavar='number of hits generated in runfile',
+                        required=False, default=1000, help="Number of hits.")
+    args = parser.parse_args()
+
+    print('Using max sim operator or not:', args.max_sim)
+
+    f = open(args.output, 'w')
+
+    reranker = LuceneIrstSearcher(args.index, args.k1, args.b, args.num_threads)
+    queries = query_loader(args.topics)
+    query_text_lst = [queries[topic]['raw'] for topic in queries.keys()]
+    qid_lst = [str(topic) for topic in queries.keys()]
+    i = 0
+    for topic in queries:
+        if i % 100 == 0:
+            print(f'Reranking {i} topic')
+        query_text_field = queries[topic]['contents']
+        query_text = queries[topic]['raw']
+        bm25_results = reranker.bm25search.search(query_text, args.hits)
+        if args.base_path:
+            baseline_dic = baseline_loader(args.base_path)
+            docids, rank_scores, base_scores = reranker.rerank(
+                query_text, query_text_field, baseline_dic[topic], args.max_sim, bm25_results)
+        else:
+            docids, rank_scores, base_scores = reranker.search(
+                query_text, query_text_field, args.max_sim, bm25_results)
+        ibm_scores = normalize([p for p in rank_scores])
+        base_scores = normalize([p for p in base_scores])
+
+        interpolated_scores = [
+            a * args.alpha + b * (1-args.alpha) for a, b in zip(base_scores, ibm_scores)]
+
+        preds, docs = sort_dual_list(interpolated_scores, docids)
+        i = i+1
+        if args.segments:
+            docid_scores = generate_maxP(preds, docs)
+            rank = 1
+            for doc_id, score in docid_scores:
+                if rank > 1000:
+                    break
+                f.write(f'{topic} Q0 {doc_id} {rank} {score} {args.tag}\n')
+                rank = rank + 1
+        else:
+            for index, (score, doc_id) in enumerate(zip(preds, docs)):
+                rank = index + 1
+                f.write(f'{topic} Q0 {doc_id} {rank} {score} {args.tag}\n')
+    f.close()
diff --git a/pyserini/search/lucene/irst/_searcher.py b/pyserini/search/lucene/irst/_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..85a69ad2cd475b40510a22ccf3bb17b921f4050e
--- /dev/null
+++ b/pyserini/search/lucene/irst/_searcher.py
@@ -0,0 +1,288 @@
+#
+# Pyserini: Python interface to the Anserini IR toolkit built on Lucene
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python translation probability search
+interface on MS MARCO dataset. The main entry point is the
+``TranslationProbabilitySearcher`` class.
+"""
+
+import json
+import math
+import os
+import pickle
+import struct
+from multiprocessing.pool import ThreadPool
+from typing import Dict
+
+from transformers import AutoTokenizer
+
+from pyserini.pyclass import autoclass
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.util import download_prebuilt_index, get_cache_home, download_url, download_and_unpack_index
+from pyserini.prebuilt_index_info import TF_INDEX_INFO
+
+# Wrappers around Anserini classes
+JQuery = autoclass('org.apache.lucene.search.Query')
+JLuceneSearcher = autoclass('io.anserini.search.SimpleSearcher')
+JIndexReader = autoclass('io.anserini.index.IndexReaderUtils')
+JTerm = autoclass('org.apache.lucene.index.Term')
+
+
+class LuceneIrstSearcher(object):
+    SELF_TRAN = 0.35
+    MIN_PROB = 0.0025
+    LAMBDA_VALUE = 0.3
+    MIN_COLLECT_PROB = 1e-9
+
+    def __init__(self, index: str, k1: int, b: int, num_threads: int):
+        translation_url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-models/ibm_model_1_bert_tok_20211117.tar.gz'
+        translation_directory = os.path.join(get_cache_home(), 'models')
+        self.termfreq_dic = self.download_and_load_wp_stats(index)
+        # This is used to download and unpack translation model instead of index, we use the function (download_and_unpack_index) for convenience.
+        self.translation_model = download_and_unpack_index(translation_url, translation_directory)
+        self.bm25search = LuceneSearcher.from_prebuilt_index(index)
+        self.bm25search.set_bm25(k1, b)
+        index_directory = os.path.join(get_cache_home(), 'indexes')
+        if index == 'msmarco-v1-passage':
+            index_path = os.path.join(index_directory,
+                                      TF_INDEX_INFO['msmarco-v1-passage']['filename'][:-6] +
+                                      TF_INDEX_INFO['msmarco-v1-passage']['md5'])
+        elif index == 'msmarco-v1-doc':
+            index_path = os.path.join(index_directory,
+                                      TF_INDEX_INFO['msmarco-v1-doc']['filename'][:-6] +
+                                      TF_INDEX_INFO['msmarco-v1-doc']['md5'])
+        elif index == 'msmarco-v1-doc-segmented':
+            index_path = os.path.join(index_directory,
+                                      TF_INDEX_INFO['msmarco-v1-doc-segmented']['filename'][:-6] +
+                                      TF_INDEX_INFO['msmarco-v1-doc-segmented']['md5'])
+        else:
+            print("We currently only support three indexes: msmarco-passage, msmarco-v1-doc and msmarco-v1-doc-segmented but the index you inserted is not one of those")
+        self.object = JLuceneSearcher(index_path)
+        self.source_lookup, self.target_lookup, self.tran = self.load_tranprobs_table()
+        self.bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+        self.pool = ThreadPool(num_threads)
+
+
+    @classmethod
+    def from_prebuilt_index(cls, prebuilt_index_name: str):
+        """Build a searcher from a pre-built index; download the index if necessary.
+
+        Parameters
+        ----------
+        prebuilt_index_name : str
+            Prebuilt index name.
+
+        Returns
+        -------
+        LuceneSearcher
+            Searcher built from the prebuilt index.
+        """
+        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
+        try:
+            index_dir = download_prebuilt_index(prebuilt_index_name)
+        except ValueError as e:
+            print(str(e))
+            return None
+
+        print(f'Initializing {prebuilt_index_name}...')
+        return cls(index_dir)
+
+    def download_and_load_wp_stats(self, index: str):
+        translation_directory = os.path.join(get_cache_home(), 'models')
+        if not os.path.exists(translation_directory):
+            os.makedirs(translation_directory)
+        if (index == 'msmarco-v1-passage'):
+            local_filename = 'bert_wp_term_freq.msmarco-passage.20220411.pickle'
+            wp_stats_path = os.path.join(translation_directory, local_filename)
+            url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/bert_wp_term_freq.msmarco-passage.20220411.pickle'
+        elif (index == 'msmarco-v1-doc'):
+            local_filename = 'bert_wp_term_freq.msmarco-doc.20220411.pickle'
+            wp_stats_path = os.path.join(translation_directory, local_filename)
+            url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/bert_wp_term_freq.msmarco-doc.20220411.pickle'
+        elif (index == 'msmarco-v1-doc-segmented'):
+            local_filename = 'bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle'
+            wp_stats_path = os.path.join(translation_directory, local_filename)
+            url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle'
+
+        if os.path.exists(wp_stats_path):
+            print(f'{wp_stats_path} already exists, skipping download.')
+        else:
+            download_url(url, translation_directory, local_filename)
+        with open(wp_stats_path, 'rb') as fin:
+            termfreq_dic = pickle.load(fin)
+        return termfreq_dic
+
+    @staticmethod
+    def intbits_to_float(b: bytes):
+        s = struct.pack('>l', b)
+        return struct.unpack('>f', s)[0]
+
+    def rescale(
+            self, source_lookup: Dict[str, int], target_lookup: Dict[str, int],
+            tran_lookup: Dict[str, Dict[str, float]],
+            target_voc: Dict[int, str], source_voc: Dict[int, str]
+            ):
+
+        for target_id in tran_lookup:
+            if target_id > 0:
+                adjust_mult = (1 - self.SELF_TRAN)
+            else:
+                adjust_mult = 1
+            # adjust the prob with adjust_mult
+            # add SELF_TRAN prob to self-translation pair
+            for source_id in tran_lookup[target_id].keys():
+                tran_prob = tran_lookup[target_id][source_id]
+                if source_id > 0:
+                    source_word = source_voc[source_id]
+                    target_word = target_voc[target_id]
+                    tran_prob *= adjust_mult
+                    if (source_word == target_word):
+                        tran_prob += self.SELF_TRAN
+                    tran_lookup[target_id][source_id] = tran_prob
+            # in case if self-translation pair was not included in TransTable
+            if target_id not in tran_lookup[target_id].keys():
+                target_word = target_voc[target_id]
+                source_id = source_lookup[target_word]
+                tran_lookup[target_id][source_id] = self.SELF_TRAN
+        return source_lookup, target_lookup, tran_lookup
+
+    def load_tranprobs_table(self):
+        dir_path = self.translation_model
+        source_path = dir_path + "/source.vcb"
+        source_lookup = {}
+        source_voc = {}
+        with open(source_path) as f:
+            lines = f.readlines()
+        for line in lines:
+            id, voc, freq = line.split(" ")
+            source_voc[int(id)] = voc
+            source_lookup[voc] = int(id)
+
+        target_path = dir_path + "/target.vcb"
+        target_lookup = {}
+        target_voc = {}
+        with open(target_path) as f:
+            lines = f.readlines()
+        for line in lines:
+            id, voc, freq = line.split(" ")
+            target_voc[int(id)] = voc
+            target_lookup[voc] = int(id)
+        tran_path = dir_path + "/output.t1.5.bin"
+        tran_lookup = {}
+        with open(tran_path, "rb") as file:
+            byte = file.read(4)
+            while byte:
+                source_id = int.from_bytes(byte, "big")
+                assert(source_id == 0 or source_id in source_voc.keys())
+                byte = file.read(4)
+                target_id = int.from_bytes(byte, "big")
+                assert(target_id in target_voc.keys())
+                byte = file.read(4)
+                tran_prob = self.intbits_to_float(int.from_bytes(byte, "big"))
+                if (target_id in tran_lookup.keys()) and (tran_prob > self.MIN_PROB):
+                    tran_lookup[target_id][source_id] = tran_prob
+                elif tran_prob > self.MIN_PROB:
+                    tran_lookup[target_id] = {}
+                    tran_lookup[target_id][source_id] = tran_prob
+                byte = file.read(4)
+        return self.rescale(
+                source_lookup, target_lookup,
+                tran_lookup, target_voc, source_voc)
+
+    def get_ibm_score(self, arguments):
+        (query_text_lst, test_doc, searcher, source_lookup,
+            target_lookup, tran, collect_probs, max_sim) = arguments
+
+        if searcher.doc_raw(test_doc) is None:
+            print(f"{test_doc} is not found in searcher")
+        contents = json.loads(self.object.doc_raw(test_doc))['contents']
+        doc_token_lst = self.bert_tokenizer.tokenize(contents.lower(), truncation=True)
+        total_query_prob = 0
+        doc_size = len(doc_token_lst)
+        query_size = len(query_text_lst)
+        for querytoken in query_text_lst:
+            target_map = {}
+            total_tran_prob = 0
+            collect_prob = collect_probs[querytoken]
+            max_sim_score = 0
+            if querytoken in target_lookup.keys():
+                query_word_id = target_lookup[querytoken]
+                if query_word_id in tran.keys():
+                    target_map = tran[query_word_id]
+                    for doctoken in doc_token_lst:
+                        tran_prob = 0
+                        doc_word_id = 0
+                        if doctoken in source_lookup.keys():
+                            doc_word_id = source_lookup[doctoken]
+                            if doc_word_id in target_map.keys():
+                                tran_prob = max(target_map[doc_word_id], tran_prob)
+                                max_sim_score = max(tran_prob, max_sim_score)
+                                total_tran_prob += (tran_prob/doc_size)
+            if max_sim:
+                query_word_prob = math.log(
+                    (1 - self.LAMBDA_VALUE) * max_sim_score + self.LAMBDA_VALUE * collect_prob)
+            else:
+                query_word_prob = math.log(
+                    (1 - self.LAMBDA_VALUE) * total_tran_prob + self.LAMBDA_VALUE * collect_prob)
+
+            total_query_prob += query_word_prob
+        return total_query_prob / query_size
+
+    def search(self, query_text, query_field_text, max_sim, bm25_results):
+        origin_scores = [bm25_result.score for bm25_result in bm25_results]
+        test_docs = [bm25_result.docid for bm25_result in bm25_results]
+        if (test_docs == []):
+            print(query_text)
+
+        query_field_text_lst = query_field_text.split(' ')
+        total_term_freq = self.termfreq_dic['TOTAL']
+        collect_probs = {}
+        for querytoken in query_field_text_lst:
+            if querytoken in self.termfreq_dic:
+                collect_probs[querytoken] = max(self.termfreq_dic[querytoken] / total_term_freq, self.MIN_COLLECT_PROB)
+            else:
+                collect_probs[querytoken] = self.MIN_COLLECT_PROB
+        arguments = [(
+            query_field_text_lst, test_doc, self.object,
+            self.source_lookup, self.target_lookup,
+            self.tran, collect_probs, max_sim)
+            for test_doc in test_docs]
+
+        rank_scores = self.pool.map(self.get_ibm_score, arguments)
+        return test_docs, rank_scores, origin_scores
+
+    def rerank(self, query_text, query_field_text, baseline, max_sim, tf_table):
+        test_docs, origin_scores = baseline
+        if (test_docs == []):
+            print(query_text)
+
+        query_field_text_lst = query_field_text.split(' ')
+        total_term_freq = self.termfreq_dic['TOTAL']
+        collect_probs = {}
+        for querytoken in query_field_text_lst:
+            if querytoken in self.termfreq_dic:
+                collect_probs[querytoken] = max(self.termfreq_dic[querytoken] / total_term_freq, self.MIN_COLLECT_PROB)
+            else:
+                collect_probs[querytoken] = self.MIN_COLLECT_PROB
+        arguments = [(
+            query_field_text_lst, test_doc, self.object, 
+            self.source_lookup, self.target_lookup,
+            self.tran, collect_probs, max_sim)
+            for test_doc in test_docs]
+
+        rank_scores = self.pool.map(self.get_ibm_score, arguments)
+        return test_docs, rank_scores, origin_scores
diff --git a/pyserini/search/lucene/ltr/__init__.py b/pyserini/search/lucene/ltr/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad82edd5febc24513b48efd4de59aba900660b02
--- /dev/null
+++ b/pyserini/search/lucene/ltr/__init__.py
@@ -0,0 +1,30 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import FeatureExtractor, BM25Stat, LmDirStat, DfrGl2Stat, DfrInExpB2Stat, DphStat, Proximity, TpScore, TpDist,\
+    DocSize, MatchingTermCount, QueryLength, SCS, SumMatchingTF, UniqueTermCount, QueryCoverageRatio, \
+    UnorderedSequentialPairs, OrderedSequentialPairs, UnorderedQueryPairs, OrderedQueryPairs, \
+    AvgPooler, SumPooler, MedianPooler, MinPooler, MaxPooler, VarPooler, TfStat, TfIdfStat, NormalizedTfStat, \
+    IdfStat, IcTfStat, ConfidencePooler, MaxMinRatioPooler, \
+    NormalizedTfIdf, ProbalitySum, RunList, IbmModel1, SpacyTextParser
+from ._search_msmarco import MsmarcoLtrSearcher
+
+__all__ = ['FeatureExtractor', 'BM25Stat', 'LmDirStat',  'DfrGl2Stat', 'DfrInExpB2Stat', 'DphStat', 'Proximity', 'TpScore', 'TpDist',
+           'DocSize', 'MatchingTermCount', 'QueryLength', 'SCS', 'SumMatchingTF', 'UniqueTermCount', 'QueryCoverageRatio',
+           'UnorderedSequentialPairs', 'OrderedSequentialPairs', 'UnorderedQueryPairs', 'OrderedQueryPairs',
+           'AvgPooler', 'SumPooler', 'MedianPooler', 'MinPooler', 'MaxPooler', 'VarPooler', 'TfStat', 'TfIdfStat',
+           'NormalizedTfStat','IdfStat', 'IcTfStat', 'ConfidencePooler', 'MaxMinRatioPooler','NormalizedTfIdf',
+            'ProbalitySum', 'RunList', 'IbmModel1', 'MsmarcoLtrSearcher','SpacyTextParser']
\ No newline at end of file
diff --git a/pyserini/search/lucene/ltr/__main__.py b/pyserini/search/lucene/ltr/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1278487c38acb283db70f4d285fa9d06566957a1
--- /dev/null
+++ b/pyserini/search/lucene/ltr/__main__.py
@@ -0,0 +1,297 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+# We're going to explicitly use a local installation of Pyserini (as opposed to a pip-installed one).
+# Comment these lines out to use a pip-installed one instead.
+sys.path.insert(0, './')
+
+import argparse
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from collections import defaultdict
+from transformers import AutoTokenizer
+from pyserini.search.lucene.ltr._search_msmarco import MsmarcoLtrSearcher
+from pyserini.search.lucene.ltr import *
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.analysis import Analyzer, get_lucene_analyzer
+
+"""
+Running prediction on candidates
+"""
+def dev_data_loader(file, format, topic, rerank, prebuilt, qrel, granularity, top=1000):
+    if rerank:
+        if format == 'tsv':
+            dev = pd.read_csv(file, sep="\t",
+                            names=['qid', 'pid', 'rank'],
+                            dtype={'qid': 'S','pid': 'S', 'rank':'i',})
+        elif format == 'trec':
+            dev = pd.read_csv(file, sep="\s+",
+                        names=['qid', 'q0', 'pid', 'rank', 'score', 'tag'],
+                        usecols=['qid', 'pid', 'rank'],
+                        dtype={'qid': 'S','pid': 'S', 'rank':'i',})
+        else:
+            raise Exception('unknown parameters')
+        assert dev['qid'].dtype == object
+        assert dev['pid'].dtype == object
+        assert dev['rank'].dtype == np.int32
+        dev = dev[dev['rank']<=top]
+    else:
+        if prebuilt:
+            bm25search = LuceneSearcher.from_prebuilt_index(args.index)
+        else:
+            bm25search = LuceneSearcher(args.index)
+        bm25search.set_bm25(0.82, 0.68)
+        dev_dic = {"qid":[], "pid":[], "rank":[]}
+        for topic in tqdm(queries.keys()):
+            query_text = queries[topic]['raw']
+            bm25_dev = bm25search.search(query_text, args.hits)
+            doc_ids = [bm25_result.docid for bm25_result in bm25_dev]
+            qid = [topic for _ in range(len(doc_ids))]
+            rank = [i for i in range(1, len(doc_ids)+1)]
+            dev_dic['qid'].extend(qid)
+            dev_dic['pid'].extend(doc_ids)
+            dev_dic['rank'].extend(rank)
+        dev = pd.DataFrame(dev_dic)
+        dev['rank'].astype(np.int32)
+    if granularity == 'document':
+        seperation = "\t"
+    else:
+        seperation = " "
+    dev_qrel = pd.read_csv(qrel, sep=seperation,
+                            names=["qid", "q0", "pid", "rel"], usecols=['qid', 'pid', 'rel'],
+                            dtype={'qid': 'S','pid': 'S', 'rel':'i'})
+    dev = dev.merge(dev_qrel, left_on=['qid', 'pid'], right_on=['qid', 'pid'], how='left')
+    dev['rel'] = dev['rel'].fillna(0).astype(np.int32)
+    dev = dev.sort_values(['qid', 'pid']).set_index(['qid', 'pid'])
+
+    print(dev.shape)
+    print(dev.index.get_level_values('qid').drop_duplicates().shape)
+    print(dev.groupby('qid').count().mean())
+    print(dev.head(10))
+    print(dev.info())
+
+    dev_rel_num = dev_qrel[dev_qrel['rel'] > 0].groupby('qid').count()['rel']
+
+    recall_point = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]
+    recall_curve = {k: [] for k in recall_point}
+    for qid, group in tqdm(dev.groupby('qid')):
+        group = group.reset_index()
+        assert len(group['pid'].tolist()) == len(set(group['pid'].tolist()))
+        total_rel = dev_rel_num.loc[qid]
+        query_recall = [0 for k in recall_point]
+        for t in group.sort_values('rank').itertuples():
+            if t.rel > 0:
+                for i, p in enumerate(recall_point):
+                    if t.rank <= p:
+                        query_recall[i] += 1
+        for i, p in enumerate(recall_point):
+            if total_rel > 0:
+                recall_curve[p].append(query_recall[i] / total_rel)
+            else:
+                recall_curve[p].append(0.)
+
+    for k, v in recall_curve.items():
+        avg = np.mean(v)
+        print(f'recall@{k}:{avg}')
+
+    return dev, dev_qrel
+
+
+def query_loader(topic):
+    queries = {}
+    nlp = SpacyTextParser('en_core_web_sm', keep_only_alpha_num=True, lower_case=True)
+    analyzer = Analyzer(get_lucene_analyzer())
+    bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+    inp_file = open(topic)
+    ln = 0
+    for line in tqdm(inp_file):
+        ln += 1
+        line = line.strip()
+        if not line:
+            continue
+        fields = line.split('\t')
+        if len(fields) != 2:
+            print('Misformated line %d ignoring:' % ln)
+            print(line.replace('\t', '<field delimiter>'))
+            continue
+        did, query = fields
+        query_lemmas, query_unlemm = nlp.proc_text(query)
+        analyzed = analyzer.analyze(query)
+        for token in analyzed:
+            if ' ' in token:
+                print(analyzed)
+        query_toks = query_lemmas.split()
+        if len(query_toks) >= 0:
+            query = {"raw" : query,
+                "text": query_lemmas.split(' '),
+                "text_unlemm": query_unlemm.split(' '),
+                "analyzed": analyzed,
+                "text_bert_tok": bert_tokenizer.tokenize(query.lower())}
+            queries[did] = query
+
+        if ln % 10000 == 0:
+            print('Processed %d queries' % ln)
+
+    print('Processed %d queries' % ln)
+    return queries
+
+
+def eval_mrr(dev_data):
+    score_tie_counter = 0
+    score_tie_query = set()
+    MRR = []
+    for qid, group in tqdm(dev_data.groupby('qid')):
+        group = group.reset_index()
+        rank = 0
+        prev_score = None
+        assert len(group['pid'].tolist()) == len(set(group['pid'].tolist()))
+        # stable sort is also used in LightGBM
+
+        for t in group.sort_values('score', ascending=False, kind='mergesort').itertuples():
+            if prev_score is not None and abs(t.score - prev_score) < 1e-8:
+                score_tie_counter += 1
+                score_tie_query.add(qid)
+            prev_score = t.score
+            rank += 1
+            if t.rel > 0:
+                MRR.append(1.0 / rank)
+                break
+            elif rank == 10 or rank == len(group):
+                MRR.append(0.)
+                break
+
+    score_tie = f'score_tie occurs {score_tie_counter} times in {len(score_tie_query)} queries'
+    print(score_tie)
+    mrr_10 = np.mean(MRR).item()
+    print(f'MRR@10:{mrr_10} with {len(MRR)} queries')
+    return {'score_tie': score_tie, 'mrr_10': mrr_10}
+
+
+def eval_recall(dev_qrel, dev_data):
+    dev_rel_num = dev_qrel[dev_qrel['rel'] > 0].groupby('qid').count()['rel']
+
+    score_tie_counter = 0
+    score_tie_query = set()
+
+    recall_point = [10,20,50,100,200,250,300,333,400,500,1000]
+    recall_curve = {k: [] for k in recall_point}
+    for qid, group in tqdm(dev_data.groupby('qid')):
+        group = group.reset_index()
+        rank = 0
+        prev_score = None
+        assert len(group['pid'].tolist()) == len(set(group['pid'].tolist()))
+        # stable sort is also used in LightGBM
+        total_rel = dev_rel_num.loc[qid]
+        query_recall = [0 for k in recall_point]
+        for t in group.sort_values('score', ascending=False, kind='mergesort').itertuples():
+            if prev_score is not None and abs(t.score - prev_score) < 1e-8:
+                score_tie_counter += 1
+                score_tie_query.add(qid)
+            prev_score = t.score
+            rank += 1
+            if t.rel > 0:
+                for i, p in enumerate(recall_point):
+                    if rank <= p:
+                        query_recall[i] += 1
+        for i, p in enumerate(recall_point):
+            if total_rel > 0:
+                recall_curve[p].append(query_recall[i] / total_rel)
+            else:
+                recall_curve[p].append(0.)
+
+    score_tie = f'score_tie occurs {score_tie_counter} times in {len(score_tie_query)} queries'
+    print(score_tie)
+    res = {'score_tie': score_tie}
+
+    for k, v in recall_curve.items():
+        avg = np.mean(v)
+        print(f'recall@{k}:{avg}')
+        res[f'recall@{k}'] = avg
+
+    return res
+
+
+def output(file, dev_data, format, maxp):
+    score_tie_counter = 0
+    score_tie_query = set()
+    output_file = open(file,'w')
+    results = defaultdict(dict)
+    idx = 0
+    for qid, group in tqdm(dev_data.groupby('qid')):
+        group = group.reset_index()
+        rank = 0
+        prev_score = None
+        assert len(group['pid'].tolist()) == len(set(group['pid'].tolist()))
+        # stable sort is also used in LightGBM
+        for t in group.sort_values('score', ascending=False, kind='mergesort').itertuples():
+            if prev_score is not None and abs(t.score - prev_score) < 1e-8:
+                score_tie_counter += 1
+                score_tie_query.add(qid)
+            prev_score = t.score
+            if maxp:
+                docid = t.pid.split('#')[0]
+                if qid not in results or docid not in results[qid] or t.score > results[qid][docid]:
+                    results[qid][docid] = t.score
+            else:
+                results[qid][t.pid] = t.score
+            
+
+    for qid in tqdm(results.keys()):
+        rank = 1
+        docid_score = results[qid]
+        docid_score = sorted(docid_score.items(),key=lambda kv: kv[1], reverse=True)
+        for docid, score in docid_score:
+            if format=='trec':
+                output_file.write(f"{qid}\tQ0\t{docid}\t{rank}\t{score}\tltr\n")
+            else:
+                output_file.write(f"{qid}\t{docid}\t{rank}\n")
+            rank += 1
+    score_tie = f'score_tie occurs {score_tie_counter} times in {len(score_tie_query)} queries'
+    print(score_tie)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Learning to rank reranking')
+    parser.add_argument('--input', default='')
+    parser.add_argument('--hits', type=int, default=1000)
+    parser.add_argument('--input-format', default = 'trec')
+    parser.add_argument('--model', required=True)
+    parser.add_argument('--index', required=True)
+    parser.add_argument('--output', required=True)
+    parser.add_argument('--ibm-model', required=True)
+    parser.add_argument('--topic', required=True)
+    parser.add_argument('--output-format', default='tsv')
+    parser.add_argument('--max-passage', action='store_true')
+    parser.add_argument('--rerank', action='store_true')
+    parser.add_argument('--qrel', required=True)
+    parser.add_argument('--granularity', default='passage')
+
+    args = parser.parse_args()
+    queries = query_loader(args.topic)
+    print("---------------------loading dev----------------------------------------")
+    prebuilt = args.index == 'msmarco-passage-ltr' or args.index == 'msmarco-doc-per-passage-ltr'
+    dev, dev_qrel = dev_data_loader(args.input, args.input_format, args.topic, args.rerank, prebuilt, args.qrel, args.granularity, args.hits)
+    searcher = MsmarcoLtrSearcher(args.model, args.ibm_model, args.index, args.granularity, prebuilt, args.topic)
+    searcher.add_fe()
+    batch_info = searcher.search(dev, queries)
+    del dev, queries
+
+    eval_res = eval_mrr(batch_info)
+    eval_recall(dev_qrel, batch_info)
+    output(args.output, batch_info,args.output_format, args.max_passage)
+    print('Done!')
\ No newline at end of file
diff --git a/pyserini/search/lucene/ltr/_base.py b/pyserini/search/lucene/ltr/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..879b897ec7d7afeba989fc37744d578033e17c82
--- /dev/null
+++ b/pyserini/search/lucene/ltr/_base.py
@@ -0,0 +1,369 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyserini.pyclass import autoclass
+import json
+import numpy as np
+import pandas as pd
+import spacy
+import re
+
+class Feature:
+   def name(self):
+        return self.extractor.getName()
+
+class NormalizedTfIdf(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.NormalizedTfIdf')
+        self.extractor = Jclass(field, qfield)
+
+class ProbalitySum(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.ProbalitySum')
+        self.extractor = Jclass(field, qfield)
+
+class IbmModel1(Feature):
+    def __init__(self, path, field, tag, qfield):
+        Jclass = autoclass('io.anserini.ltr.feature.IbmModel1')
+        self.extractor = Jclass(path, field, tag, qfield)
+
+class Proximity(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.Proximity')
+        self.extractor = Jclass(field, qfield)
+
+class TpScore(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.TpScore')
+        self.extractor = Jclass(field, qfield)
+
+class TpDist(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.TpDist')
+        self.extractor = Jclass(field, qfield)
+
+class DocSize(Feature):
+    def __init__(self, field='contents'):
+        Jclass = autoclass('io.anserini.ltr.feature.DocSize')
+        self.extractor = Jclass(field)
+
+class MatchingTermCount(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.MatchingTermCount')
+        self.extractor = Jclass(field, qfield)
+
+class QueryLength(Feature):
+    def __init__(self, qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.QueryLength')
+        self.extractor = Jclass(qfield)
+
+class SCS(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.SCS')
+        self.extractor = Jclass(field, qfield)
+
+class SumMatchingTF(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.SumMatchingTF')
+        self.extractor = Jclass(field, qfield)
+
+class QueryCoverageRatio(Feature):
+    def __init__(self, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.QueryCoverageRatio')
+        self.extractor = Jclass(field, qfield)
+
+class RunList(Feature):
+    def __init__(self,filename,tag):
+        Jclass = autoclass('io.anserini.ltr.feature.RunList')
+        self.extractor = Jclass(filename,tag)
+
+class UniqueTermCount(Feature):
+    def __init__(self, qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.UniqueTermCount')
+        self.extractor = Jclass(qfield)
+
+class UnorderedSequentialPairs(Feature):
+    def __init__(self, gap=8, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.UnorderedSequentialPairs')
+        self.extractor = Jclass(gap, field, qfield)
+
+class OrderedSequentialPairs(Feature):
+    def __init__(self, gap=8, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.OrderedSequentialPairs')
+        self.extractor = Jclass(gap, field, qfield)
+
+class UnorderedQueryPairs(Feature):
+    def __init__(self, gap=8, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.UnorderedQueryPairs')
+        self.extractor = Jclass(gap, field, qfield)
+
+class OrderedQueryPairs(Feature):
+    def __init__(self, gap=8, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.OrderedQueryPairs')
+        self.extractor = Jclass(gap, field, qfield)
+
+class AvgPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.AvgPooler')
+        self.extractor = Jclass()
+
+class SumPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.SumPooler')
+        self.extractor = Jclass()
+
+class MedianPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.MedianPooler')
+        self.extractor = Jclass()
+
+class MinPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.MinPooler')
+        self.extractor = Jclass()
+
+class MaxPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.MaxPooler')
+        self.extractor = Jclass()
+
+class VarPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.VarPooler')
+        self.extractor = Jclass()
+
+class ConfidencePooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.ConfidencePooler')
+        self.extractor = Jclass()
+
+class MaxMinRatioPooler(Feature):
+    def __init__(self):
+        Jclass = autoclass('io.anserini.ltr.MaxMinRatioPooler')
+        self.extractor = Jclass()
+
+class TfStat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.TfStat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+class TfIdfStat(Feature):
+    def __init__(self, sublinear, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.TfIdfStat')
+        JBoolean = autoclass('java.lang.Boolean')
+        self.extractor = Jclass(JBoolean(sublinear), pooler.extractor, field, qfield)
+
+class NormalizedTfStat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.NormalizedTfStat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+class IdfStat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.IdfStat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+class IcTfStat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.IcTfStat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+class BM25Stat(Feature):
+    def __init__(self, pooler, k1=0.9, b=0.4, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.BM25Stat')
+        self.extractor = Jclass(pooler.extractor, k1, b, field, qfield)
+
+class DfrInExpB2Stat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.DfrInExpB2Stat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+class DphStat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.DphStat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+class LmDirStat(Feature):
+    def __init__(self, pooler, mu=1000, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.LmDirStat')
+        self.extractor = Jclass(pooler.extractor, mu, field, qfield)
+
+class DfrGl2Stat(Feature):
+    def __init__(self, pooler, field='contents', qfield='analyzed'):
+        Jclass = autoclass('io.anserini.ltr.feature.DfrGl2Stat')
+        self.extractor = Jclass(pooler.extractor, field, qfield)
+
+
+class FeatureExtractor:
+    def __init__(self, index_dir, worker_num=1):
+        JFeatureExtractorUtils = autoclass('io.anserini.ltr.FeatureExtractorUtils')
+        self.utils = JFeatureExtractorUtils(index_dir, worker_num)
+        self.feature_name = []
+
+    def add(self, pyclass):
+        """
+        add feature extractor; cannot add feature extractors in the middle of extraction
+        Parameters
+        ----------
+        pyclass: Feature
+            an initialized feature extractor
+        """
+        self.utils.add(pyclass.extractor)
+        self.feature_name.append(pyclass.name())
+
+    def feature_names(self):
+        """
+        get all feature names
+        Returns
+        -------
+        List[str]   all the feature names in order
+        """
+        return self.feature_name
+
+    def lazy_extract(self, qid, doc_ids, query_dict):
+        input = {'qid': qid, 'docIds': doc_ids}
+        input.update(query_dict)
+        self.utils.lazyExtract(json.dumps(input))
+
+    def batch_extract(self, tasks):
+        need_rows = 0
+        for task in tasks:
+            self.lazy_extract(task['qid'], task['docIds'], task['query_dict'])
+            need_rows += len(task['docIds'])
+        feature_name = self.feature_names()
+        feature = np.zeros(shape=(need_rows, len(feature_name)), dtype=np.float32)
+        idx = 0
+        for task in tasks:
+            flattened = self.get_result(task['qid'])
+            feature[idx:idx+len(task['docIds']),:] = flattened.reshape(len(task['docIds']), len(feature_name))
+            idx += len(task['docIds'])
+        return pd.DataFrame(feature, columns=feature_name)
+
+
+    def get_result(self, qid):
+        res = self.utils.getResult(qid).tostring()
+        dt = np.dtype(np.float32)
+        dt = dt.newbyteorder('>')
+        return np.frombuffer(res, dt)
+
+class SpacyTextParser:
+    def __init__(self, model_name, 
+                 remove_punct=True,
+                 sent_split=False,
+                 keep_only_alpha_num=False,
+                 lower_case=True,
+                 enable_POS=True):
+
+        disable_list = ['ner', 'parser']
+        if not enable_POS:
+            disable_list.append('tagger')
+        print('Disabled Spacy components: ', disable_list)
+
+        self._nlp = spacy.load(model_name, disable=disable_list)
+        if sent_split:
+            sentencizer = self._nlp.create_pipe("sentencizer")
+            self._nlp.add_pipe(sentencizer)
+
+        self._remove_punct = remove_punct
+        sw = ['a', 'about', 'above', 'according', 'across', 'after', 
+              'afterwards', 'again', 'against', 'albeit', 'all', 'almost', 
+              'alone', 'along', 'already', 'also', 'although', 'always', 'am', 
+              'among', 'amongst', 'an', 'and', 'another', 'any', 'anybody', 'anyhow', 
+              'anyone', 'anything', 'anyway', 'anywhere', 'apart', 'are', 'around', 
+              'as', 'at', 'av', 'be', 'became', 'because', 'become', 'becomes', 
+              'becoming', 'been', 'before', 'beforehand', 'behind', 'being', 'below', 
+              'beside', 'besides', 'between', 'beyond', 'both', 'but', 'by', 'can', 
+              'cannot', 'canst', 'certain', 'cf', 'choose', 'contrariwise', 'cos', 
+              'could', 'cu', 'day', 'do', 'does', "doesn't", 'doing', 'dost', 'doth', 
+              'double', 'down', 'dual', 'during', 'each', 'either', 'else', 'elsewhere', 
+              'enough', 'et', 'etc', 'even', 'ever', 'every', 'everybody', 'everyone', 
+              'everything', 'everywhere', 'except', 'excepted', 'excepting', 'exception', 
+              'exclude', 'excluding', 'exclusive', 'far', 'farther', 'farthest', 'few', 
+              'ff', 'first', 'for', 'formerly', 'forth', 'forward', 'from', 'front', 
+              'further', 'furthermore', 'furthest', 'get', 'go', 'had', 'halves', 'hardly', 
+              'has', 'hast', 'hath', 'have', 'he', 'hence', 'henceforth', 'her', 'here', 
+              'hereabouts', 'hereafter', 'hereby', 'herein', 'hereto', 'hereupon', 'hers', 
+              'herself', 'him', 'himself', 'hindmost', 'his', 'hither', 'hitherto', 'how', 
+              'however', 'howsoever', 'i', 'ie', 'if', 'in', 'inasmuch', 'inc', 'include', 
+              'included', 'including', 'indeed', 'indoors', 'inside', 'insomuch', 'instead', 
+              'into', 'inward', 'inwards', 'is', 'it', 'its', 'itself', 'just', 'kind', 'kg', 
+              'km', 'last', 'latter', 'latterly', 'less', 'lest', 'let', 'like', 'little', 'ltd', 
+              'many', 'may', 'maybe', 'me', 'meantime', 'meanwhile', 'might', 'moreover', 'most', 
+              'mostly', 'more', 'mr', 'mrs', 'ms', 'much', 'must', 'my', 'myself', 'namely', 'need', 
+              'neither', 'never', 'nevertheless', 'next', 'no', 'nobody', 'none', 'nonetheless', 
+              'noone', 'nope', 'nor', 'not', 'nothing', 'notwithstanding', 'now', 'nowadays', 
+              'nowhere', 'of', 'off', 'often', 'ok', 'on', 'once', 'one', 'only', 'onto', 'or', 
+              'other', 'others', 'otherwise', 'ought', 'our', 'ours', 'ourselves', 'out', 'outside', 
+              'over', 'own', 'per', 'perhaps', 'plenty', 'provide', 'quite', 'rather', 'really', 
+              'round', 'said', 'sake', 'same', 'sang', 'save', 'saw', 'see', 'seeing', 'seem', 'seemed', 
+              'seeming', 'seems', 'seen', 'seldom', 'selves', 'sent', 'several', 'shalt', 'she', 'should', 
+              'shown', 'sideways', 'since', 'slept', 'slew', 'slung', 'slunk', 'smote', 'so', 'some', 
+              'somebody', 'somehow', 'someone', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 
+              'spake', 'spat', 'spoke', 'spoken', 'sprang', 'sprung', 'stave', 'staves', 'still', 'such', 
+              'supposing', 'than', 'that', 'the', 'thee', 'their', 'them', 'themselves', 'then', 'thence', 
+              'thenceforth', 'there', 'thereabout', 'thereabouts', 'thereafter', 'thereby', 'therefore', 
+              'therein', 'thereof', 'thereon', 'thereto', 'thereupon', 'these', 'they', 'this', 'those', 
+              'thou', 'though', 'thrice', 'through', 'throughout', 'thru', 'thus', 'thy', 'thyself', 'till', 
+              'to', 'together', 'too', 'toward', 'towards', 'ugh', 'unable', 'under', 'underneath', 'unless', 
+              'unlike', 'until', 'up', 'upon', 'upward', 'upwards', 'us', 'use', 'used', 'using', 'very', 'via', 
+              'vs', 'want', 'was', 'we', 'week', 'well', 'were', 'what', 'whatever', 'whatsoever', 'when', 
+              'whence', 'whenever', 'whensoever', 'where', 'whereabouts', 'whereafter', 'whereas', 'whereat', 
+              'whereby', 'wherefore', 'wherefrom', 'wherein', 'whereinto', 'whereof', 'whereon', 'wheresoever', 
+              'whereto', 'whereunto', 'whereupon', 'wherever', 'wherewith', 'whether', 'whew', 'which', 
+              'whichever', 'whichsoever', 'while', 'whilst', 'whither', 'who', 'whoa', 'whoever', 'whole', 
+              'whom', 'whomever', 'whomsoever', 'whose', 'whosoever', 'why', 'will', 'wilt', 'with', 'within', 
+              'without', 'worse', 'worst', 'would', 'wow', 'ye', 'yet', 'year', 'yippee', 'you', 'your', 'yours', 
+              'yourself', 'yourselves', "n't", "'d", "'ll", "'m", "'re", "'s", "'ves"]
+        stopwords = set(sw)
+        self._stopwords = frozenset([w.lower() for w in stopwords])
+        self._keep_only_alpha_num = keep_only_alpha_num
+        self._lower_case = lower_case
+
+    @staticmethod
+    def _basic_clean(text):
+        return text.replace("’", "'")
+
+    def __call__(self, text):
+        return self._nlp(SpacyTextParser._basic_clean(text))
+    
+    def is_alpha_num(self, s):
+        return s and (re.match("^[a-zA-Z-_.0-9]+$", s) is not None)
+
+    def proc_text(self, text):
+        lemmas = []
+        tokens = []
+        doc = self(text)
+        for tokObj in doc:
+            if self._remove_punct and tokObj.is_punct:
+                continue
+            lemma = tokObj.lemma_
+            text = tokObj.text
+            if self._keep_only_alpha_num and not self.is_alpha_num(text):
+                continue
+            tok1 = text.lower()
+            tok2 = lemma.lower()
+            if tok1 in self._stopwords or tok2 in self._stopwords:
+                continue
+
+            if self._lower_case:
+                text = text.lower()
+                lemma = lemma.lower()
+
+            lemmas.append(lemma)
+            tokens.append(text)
+
+        return ' '.join(lemmas), ' '.join(tokens)
+        
\ No newline at end of file
diff --git a/pyserini/search/lucene/ltr/_search_msmarco.py b/pyserini/search/lucene/ltr/_search_msmarco.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc6ced6ae12343cb89b318bbd5d99d061d536005
--- /dev/null
+++ b/pyserini/search/lucene/ltr/_search_msmarco.py
@@ -0,0 +1,255 @@
+#
+# Pyserini: Python interface to the Anserini IR toolkit built on Lucene
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python ltr search interface on MS MARCO passage. The main entry point is the ``MsmarcoPassageLtrSearcher``
+class.
+"""
+
+import logging
+import multiprocessing
+import time
+import os
+from tqdm import tqdm
+import pickle
+from pyserini.index.lucene import IndexReader
+from pyserini.search.lucene import LuceneSearcher
+from pyserini.util import get_cache_home
+
+from pyserini.search.lucene.ltr._base import *
+
+
+logger = logging.getLogger(__name__)
+
+class MsmarcoLtrSearcher:
+    def __init__(self, model: str, ibm_model:str, index:str, data: str, prebuilt: bool, topic: str):
+        #msmarco-ltr-passage
+        self.model = model
+        self.ibm_model = ibm_model
+        if prebuilt:
+            self.lucene_searcher = LuceneSearcher.from_prebuilt_index(index)
+            index_directory = os.path.join(get_cache_home(), 'indexes')
+            if data == 'passage':
+                index_path = os.path.join(index_directory, 'index-msmarco-passage-ltr-20210519-e25e33f.a5de642c268ac1ed5892c069bdc29ae3')
+            else:
+                index_path = os.path.join(index_directory, 'index-msmarco-doc-per-passage-ltr-20211031-33e4151.bd60e89041b4ebbabc4bf0cfac608a87')
+            self.index_reader = IndexReader.from_prebuilt_index(index)
+        else:
+            index_path = index
+            self.index_reader = IndexReader(index)
+        self.fe = FeatureExtractor(index_path, max(multiprocessing.cpu_count()//2, 1))
+        self.data = data
+
+    
+    def add_fe(self):
+        #self.fe.add(RunList('collections/msmarco-ltr-passage/run.monot5.run_list.whole.trec','t5'))
+        #self.fe.add(RunList('../bert.whole.doc.trec','bert'))
+        for qfield, ifield in [('analyzed', 'contents'),
+                           ('text_unlemm', 'text_unlemm'),
+                           ('text_bert_tok', 'text_bert_tok')]:
+            print(qfield, ifield)
+            self.fe.add(BM25Stat(SumPooler(), k1=2.0, b=0.75, field=ifield, qfield=qfield))
+            self.fe.add(BM25Stat(AvgPooler(), k1=2.0, b=0.75, field=ifield, qfield=qfield))
+            self.fe.add(BM25Stat(MedianPooler(), k1=2.0, b=0.75, field=ifield, qfield=qfield))
+            self.fe.add(BM25Stat(MaxPooler(), k1=2.0, b=0.75, field=ifield, qfield=qfield))
+            self.fe.add(BM25Stat(MinPooler(), k1=2.0, b=0.75, field=ifield, qfield=qfield))
+            self.fe.add(BM25Stat(MaxMinRatioPooler(), k1=2.0, b=0.75, field=ifield, qfield=qfield))
+
+            self.fe.add(LmDirStat(SumPooler(), mu=1000, field=ifield, qfield=qfield))
+            self.fe.add(LmDirStat(AvgPooler(), mu=1000, field=ifield, qfield=qfield))
+            self.fe.add(LmDirStat(MedianPooler(), mu=1000, field=ifield, qfield=qfield))
+            self.fe.add(LmDirStat(MaxPooler(), mu=1000, field=ifield, qfield=qfield))
+            self.fe.add(LmDirStat(MinPooler(), mu=1000, field=ifield, qfield=qfield))
+            self.fe.add(LmDirStat(MaxMinRatioPooler(), mu=1000, field=ifield, qfield=qfield))
+
+            self.fe.add(NormalizedTfIdf(field=ifield, qfield=qfield))
+            self.fe.add(ProbalitySum(field=ifield, qfield=qfield))
+
+            self.fe.add(DfrGl2Stat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrGl2Stat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrGl2Stat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrGl2Stat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrGl2Stat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrGl2Stat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(DfrInExpB2Stat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrInExpB2Stat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrInExpB2Stat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrInExpB2Stat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrInExpB2Stat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DfrInExpB2Stat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(DphStat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DphStat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DphStat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DphStat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DphStat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(DphStat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(Proximity(field=ifield, qfield=qfield))
+            self.fe.add(TpScore(field=ifield, qfield=qfield))
+            self.fe.add(TpDist(field=ifield, qfield=qfield))
+
+            self.fe.add(DocSize(field=ifield))
+
+            self.fe.add(QueryLength(qfield=qfield))
+            self.fe.add(QueryCoverageRatio(qfield=qfield))
+            self.fe.add(UniqueTermCount(qfield=qfield))
+            self.fe.add(MatchingTermCount(field=ifield, qfield=qfield))
+            self.fe.add(SCS(field=ifield, qfield=qfield))
+
+            self.fe.add(TfStat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfStat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfStat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfStat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfStat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfStat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(TfIdfStat(True, AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfIdfStat(True, MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfIdfStat(True, SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfIdfStat(True, MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfIdfStat(True, MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(TfIdfStat(True, MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(NormalizedTfStat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(NormalizedTfStat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(NormalizedTfStat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(NormalizedTfStat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(NormalizedTfStat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(NormalizedTfStat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(IdfStat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IdfStat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IdfStat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IdfStat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IdfStat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IdfStat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(IcTfStat(AvgPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IcTfStat(MedianPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IcTfStat(SumPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IcTfStat(MinPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IcTfStat(MaxPooler(), field=ifield, qfield=qfield))
+            self.fe.add(IcTfStat(MaxMinRatioPooler(), field=ifield, qfield=qfield))
+
+            self.fe.add(UnorderedSequentialPairs(3, field=ifield, qfield=qfield))
+            self.fe.add(UnorderedSequentialPairs(8, field=ifield, qfield=qfield))
+            self.fe.add(UnorderedSequentialPairs(15, field=ifield, qfield=qfield))
+            self.fe.add(OrderedSequentialPairs(3, field=ifield, qfield=qfield))
+            self.fe.add(OrderedSequentialPairs(8, field=ifield, qfield=qfield))
+            self.fe.add(OrderedSequentialPairs(15, field=ifield, qfield=qfield))
+            self.fe.add(UnorderedQueryPairs(3, field=ifield, qfield=qfield))
+            self.fe.add(UnorderedQueryPairs(8, field=ifield, qfield=qfield))
+            self.fe.add(UnorderedQueryPairs(15, field=ifield, qfield=qfield))
+            self.fe.add(OrderedQueryPairs(3, field=ifield, qfield=qfield))
+            self.fe.add(OrderedQueryPairs(8, field=ifield, qfield=qfield))
+            self.fe.add(OrderedQueryPairs(15, field=ifield, qfield=qfield))
+
+        start = time.time()
+        self.fe.add(IbmModel1(f"{self.ibm_model}/title_unlemm", "text_unlemm", "title_unlemm", "text_unlemm"))
+        end = time.time()
+        print('IBM model Load takes %.2f seconds' % (end - start))
+        start = end
+        self.fe.add(IbmModel1(f"{self.ibm_model}url_unlemm", "text_unlemm", "url_unlemm", "text_unlemm"))
+        end = time.time()
+        print('IBM model Load takes %.2f seconds' % (end - start))
+        start = end
+        self.fe.add(IbmModel1(f"{self.ibm_model}body", "text_unlemm", "body", "text_unlemm"))
+        end = time.time()
+        print('IBM model Load takes %.2f seconds' % (end - start))
+        start = end
+        self.fe.add(IbmModel1(f"{self.ibm_model}text_bert_tok", "text_bert_tok", "text_bert_tok", "text_bert_tok"))
+        end = time.time()
+        print('IBM model Load takes %.2f seconds' % (end - start))
+        start = end
+    
+    def batch_extract(self, df, queries, fe):
+        tasks = []
+        task_infos = []
+        group_lst = []
+
+        for qid, group in tqdm(df.groupby('qid')):
+            task = {
+                "qid": qid,
+                "docIds": [],
+                "rels": [],
+                "query_dict": queries[qid]
+            }
+            for t in group.reset_index().itertuples():
+                if self.data == 'document':
+                    if self.index_reader.doc(t.pid) != None:
+                        task["docIds"].append(t.pid)
+                        task_infos.append((qid, t.pid, t.rel))
+                else:
+                    task["docIds"].append(t.pid)
+                    task_infos.append((qid, t.pid, t.rel))
+            tasks.append(task)
+            group_lst.append((qid, len(task['docIds'])))
+            if len(tasks) == 1000:
+                features = fe.batch_extract(tasks)
+                task_infos = pd.DataFrame(task_infos, columns=['qid', 'pid', 'rel'])
+                group = pd.DataFrame(group_lst, columns=['qid', 'count'])
+                print(features.shape)
+                print(task_infos.qid.drop_duplicates().shape)
+                print(group.mean())
+                print(features.head(10))
+                print(features.info())
+                yield task_infos, features, group
+                tasks = []
+                task_infos = []
+                group_lst = []
+        # deal with rest
+        if len(tasks) > 0:
+            features = fe.batch_extract(tasks)
+            task_infos = pd.DataFrame(task_infos, columns=['qid', 'pid', 'rel'])
+            group = pd.DataFrame(group_lst, columns=['qid', 'count'])
+            print(features.shape)
+            print(task_infos.qid.drop_duplicates().shape)
+            print(group.mean())
+            print(features.head(10))
+            print(features.info())
+            yield task_infos, features, group
+
+        return
+
+    def batch_predict(self, models, dev_extracted, feature_name):
+        task_infos, features, group = dev_extracted
+        dev_X = features.loc[:, feature_name]
+
+        task_infos['score'] = 0.
+        for gbm in models:
+            task_infos['score'] += gbm.predict(dev_X)
+    
+    def search(self, dev, queries):
+        batch_info = []
+        start_extract = time.time()
+        models = pickle.load(open(self.model+'/model.pkl', 'rb'))
+        metadata = json.load(open(self.model+'/metadata.json', 'r'))
+        feature_used = metadata['feature_names']
+        for dev_extracted in self.batch_extract(dev, queries, self.fe):
+            end_extract = time.time()
+            print(f'extract 1000 queries take {end_extract - start_extract}s')
+            task_infos, features, group = dev_extracted
+            start_predict = time.time()
+            self.batch_predict(models, dev_extracted, feature_used)
+            end_predict = time.time()
+            print(f'predict 1000 queries take {end_predict - start_predict}s')
+            batch_info.append(task_infos)
+            start_extract = time.time()
+        batch_info = pd.concat(batch_info, axis=0, ignore_index=True)
+        return batch_info
+
diff --git a/pyserini/search/lucene/querybuilder.py b/pyserini/search/lucene/querybuilder.py
new file mode 100644
index 0000000000000000000000000000000000000000..7627121c2bd79f616a2a5854f1670454acfc6e4e
--- /dev/null
+++ b/pyserini/search/lucene/querybuilder.py
@@ -0,0 +1,90 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This module provides Pyserini's Python interface query building for Anserini.
+"""
+import logging
+from enum import Enum
+
+from pyserini.analysis import get_lucene_analyzer, Analyzer
+from pyserini.pyclass import autoclass
+
+logger = logging.getLogger(__name__)
+
+
+# Wrapper around Lucene clases
+JTerm = autoclass('org.apache.lucene.index.Term')
+JBooleanClause = autoclass('org.apache.lucene.search.BooleanClause')
+JBoostQuery = autoclass('org.apache.lucene.search.BoostQuery')
+JTermQuery = autoclass('org.apache.lucene.search.TermQuery')
+
+# Wrappers around Anserini classes
+JQueryGeneratorUtils = autoclass('io.anserini.search.query.QueryGeneratorUtils')
+
+
+class JBooleanClauseOccur(Enum):
+    should = JQueryGeneratorUtils.getBooleanClauseShould()
+    must = JQueryGeneratorUtils.getBooleanClauseMust()
+    must_not = JQueryGeneratorUtils.getBooleanClauseMustNot()
+    filter = JQueryGeneratorUtils.getBooleanClauseFilter()
+
+
+def get_boolean_query_builder():
+    """Get a BooleanQueryBuilder object.
+
+    Returns
+    -------
+    JBooleanQueryBuilder
+    """
+    return JQueryGeneratorUtils.getBooleanQueryBuilder()
+
+
+def get_term_query(term, field="contents", analyzer=get_lucene_analyzer()):
+    """Searches the collection.
+
+    Parameters
+    ----------
+    term : str
+        The query term string.
+    field : str
+        Field to search.
+    analyzer : Analyzer
+        Analyzer to use for tokenizing the query term.
+
+    Returns
+    -------
+    JTermQuery
+    """
+    analyzer = Analyzer(analyzer)
+    return JTermQuery(JTerm(field, analyzer.analyze(term)[0]))
+
+
+def get_boost_query(query, boost):
+    """Get boost query.
+
+    Parameters
+    ----------
+    query : str
+        The query object to boost.
+    boost : float
+        Score multiplier.
+
+    Returns
+    -------
+    JBoostQuery
+    """
+    return JBoostQuery(query, boost)
diff --git a/pyserini/search/lucene/reranker.py b/pyserini/search/lucene/reranker.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5fa177593b01189059621b5cccba5861c430425
--- /dev/null
+++ b/pyserini/search/lucene/reranker.py
@@ -0,0 +1,123 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import enum
+import importlib
+import os
+import uuid
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from typing import List
+
+
+class ClassifierType(enum.Enum):
+    LR = 'lr'
+    SVM = 'svm'
+
+
+class FusionMethod(enum.Enum):
+    AVG = 'avg'
+
+
+class PseudoRelevanceClassifierReranker:
+    def __init__(self, lucene_index: str, vectorizer_class: str, clf_type: List[ClassifierType], r=10, n=100, alpha=0.5):
+        self.r = r
+        self.n = n
+        self.alpha = alpha
+        self.clf_type = clf_type
+
+        # get vectorizer
+        module = importlib.import_module("pyserini.vectorizer")
+        VectorizerClass = getattr(module, vectorizer_class)
+        self.vectorizer = VectorizerClass(lucene_index, min_df=5)
+
+        if len(clf_type) > 2:
+            raise Exception('Re-ranker takes at most two classifiers')
+
+    def _set_classifier(self, clf_type: ClassifierType):
+        if clf_type == ClassifierType.LR:
+            self.clf = LogisticRegression(random_state=42)
+        elif clf_type == ClassifierType.SVM:
+            self.clf = SVC(kernel='linear', probability=True, random_state=42)
+        else:
+            raise Exception("Invalid classifier type")
+
+    def _get_prf_vectors(self, doc_ids: List[str]):
+        train_docs = doc_ids[:self.r] + doc_ids[-self.n:]
+        train_labels = [1] * self.r + [0] * self.n
+
+        train_vecs = self.vectorizer.get_vectors(train_docs)
+        test_vecs = self.vectorizer.get_vectors(doc_ids)
+
+        return train_vecs, train_labels, test_vecs
+
+    def _rerank_with_classifier(self, doc_ids: List[str], search_scores: List[float]):
+        train_vecs, train_labels, test_vecs = self._get_prf_vectors(doc_ids)
+
+        # classification
+        self.clf.fit(train_vecs, train_labels)
+        pred = self.clf.predict_proba(test_vecs)
+        classifier_scores = self._normalize([p[1] for p in pred])
+        search_scores = self._normalize(search_scores)
+
+        # interpolation
+        interpolated_scores = [a * self.alpha + b * (1-self.alpha) for a, b in zip(classifier_scores, search_scores)]
+
+        return self._sort_dual_list(interpolated_scores, doc_ids)
+
+    def rerank(self, doc_ids: List[str], search_scores: List[float]):
+        # one classifier
+        if len(self.clf_type) == 1:
+            self._set_classifier(self.clf_type[0])
+            return self._rerank_with_classifier(doc_ids, search_scores)
+
+        # two classifier with FusionMethod.AVG
+        doc_score_dict = {}
+        for i in range(2):
+            self._set_classifier(self.clf_type[i])
+            i_scores, i_doc_ids = self._rerank_with_classifier(doc_ids, search_scores)
+
+            for score, doc_id in zip(i_scores, i_doc_ids):
+                if doc_id not in doc_score_dict:
+                    doc_score_dict[doc_id] = set()
+                doc_score_dict[doc_id].add(score)
+
+        r_scores, r_doc_ids = [], []
+        for doc_id, score in doc_score_dict.items():
+            avg = sum(score) / len(score)
+            r_doc_ids.append(doc_id)
+            r_scores.append(avg)
+
+        return r_scores, r_doc_ids
+
+    def _normalize(self, scores: List[float]):
+        low = min(scores)
+        high = max(scores)
+        width = high - low
+
+        return [(s-low)/width for s in scores]
+
+    # sort both list in decreasing order by using the list1 to compare
+    def _sort_dual_list(self, list1, list2):
+        zipped_lists = zip(list1, list2)
+        sorted_pairs = sorted(zipped_lists)
+
+        tuples = zip(*sorted_pairs)
+        list1, list2 = [list(tuple) for tuple in tuples]
+
+        list1.reverse()
+        list2.reverse()
+        return list1, list2
diff --git a/pyserini/search/nmslib/__init__.py b/pyserini/search/nmslib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3188206e5cf2f8ce6d91dddf748fcd8bac193ff
--- /dev/null
+++ b/pyserini/search/nmslib/__init__.py
@@ -0,0 +1,19 @@
+#
+# Pyserini: Python interface to the Anserini IR toolkit built on Lucene
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._searcher import SearchResult, NmslibSearcher
+
+__all__ = ['SearchResult', 'NmslibSearcher']
diff --git a/pyserini/search/nmslib/__main__.py b/pyserini/search/nmslib/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..581fafa916a1c3e586308b7491688802b382d0b2
--- /dev/null
+++ b/pyserini/search/nmslib/__main__.py
@@ -0,0 +1,94 @@
+#
+# Pyserini: Python interface to the Anserini IR toolkit built on Lucene
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import json
+import time
+from tqdm import tqdm
+
+from ._searcher import NmslibSearcher
+from pyserini.output_writer import get_output_writer, OutputFormat, tie_breaker
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Search a nmslib index.')
+    parser.add_argument('--index', type=str, metavar='path to index or index name', required=True,
+                        help="Path to nmslib index.")
+    parser.add_argument('--topics', type=str, required=True, help="path to topics")
+    parser.add_argument('--hits', type=int, metavar='num', required=False, default=1000, help="Number of hits.")
+    parser.add_argument('--output-format', type=str, metavar='format', default=OutputFormat.TREC.value,
+                        help=f"Format of output. Available: {[x.value for x in list(OutputFormat)]}")
+    parser.add_argument('--output', type=str, metavar='path', required=True, help="Path to output file.")
+    parser.add_argument('--ef', type=int, required=False, default=256, help="hnsw ef_search")
+    parser.add_argument('--threads', type=int, metavar='num', required=False, default=1,
+                        help="maximum threads to use during search")
+    parser.add_argument('--batch-size', type=int, metavar='num', required=False, default=1,
+                        help="search batch of queries in parallel")
+    parser.add_argument('--is-sparse', action='store_true', required=False)
+    args = parser.parse_args()
+
+    searcher = NmslibSearcher(args.index, ef_search=args.ef, is_sparse=args.is_sparse)
+
+    topic_ids = []
+    topic_vectors = []
+    with open(args.topics) as topic_f:
+        for line in topic_f:
+            info = json.loads(line)
+            topic_ids.append(info['id'])
+            topic_vectors.append(info['vector'])
+
+    if not searcher:
+        exit()
+
+    # build output path
+    output_path = args.output
+
+    print(f'Running {args.topics} topics, saving to {output_path}...')
+    tag = 'HNSW'
+
+    # support trec and msmarco format only for now
+    output_writer = get_output_writer(output_path, OutputFormat(args.output_format), max_hits=args.hits, tag=tag)
+
+    search_time = 0
+    with output_writer:
+        batch_topic_vectors = list()
+        batch_topic_ids = list()
+        for index, (topic_id, vec) in enumerate(tqdm(zip(topic_ids, topic_vectors))):
+            if args.batch_size <= 1 and args.threads <= 1:
+                start = time.time()
+                hits = searcher.search(vec, args.hits)
+                search_time += time.time() - start
+                results = [(topic_id, hits)]
+            else:
+                batch_topic_ids.append(str(topic_id))
+                batch_topic_vectors.append(vec)
+                if (index + 1) % args.batch_size == 0 or \
+                        index == len(topic_ids) - 1:
+                    start = time.time()
+                    results = searcher.batch_search(
+                            batch_topic_vectors, batch_topic_ids, args.hits, args.threads)
+                    search_time += time.time() - start
+                    results = [(id_, results[id_]) for id_ in batch_topic_ids]
+                    batch_topic_ids.clear()
+                    batch_topic_vectors.clear()
+                else:
+                    continue
+
+            for topic, hits in results:
+                output_writer.write(topic, tie_breaker(hits))
+
+            results.clear()
+
+    print(f'Search {len(topic_ids)} topics in {search_time} seconds')
diff --git a/pyserini/search/nmslib/_searcher.py b/pyserini/search/nmslib/_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0db6a24743c5e479d221f906c1bcc3bc107dc87
--- /dev/null
+++ b/pyserini/search/nmslib/_searcher.py
@@ -0,0 +1,142 @@
+#
+# Pyserini: Python interface to the Anserini IR toolkit built on Lucene
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+import os
+from dataclasses import dataclass
+from typing import Dict, List
+
+import nmslib
+import numpy as np
+from scipy.sparse import csr_matrix, vstack
+
+
+@dataclass
+class SearchResult:
+    docid: str
+    score: float
+
+
+class NmslibSearcher:
+    """Simple Searcher for vector representation
+    """
+
+    def __init__(self, index_dir: str, ef_search: int = 1000, is_sparse=False):
+        self.is_sparse = is_sparse
+        self.index, self.docids, self.token2id, self.metadata = self._load_index(index_dir, self.is_sparse)
+        self.index.setQueryTimeParams({'efSearch': ef_search})
+        self.dimension = len(self.token2id) if self.is_sparse else None
+
+    def search(self, query, k: int = 10) -> List[SearchResult]:
+        """Search the collection.
+
+        Parameters
+        ----------
+        query : query vector
+        k : int
+            Number of hits to return.
+        threads : int
+            Maximum number of threads to use for intra-query search.
+        Returns
+        -------
+        List[SearchResult]
+            List of search results.
+        """
+        if self.is_sparse:
+            query = self._token_dict_to_sparse_vector(query)
+        else:
+            query = np.array([query])
+        indexes, scores = self.index.knnQueryBatch(query, k=k, num_threads=1)[0]
+        return [SearchResult(self.docids[idx], -score)
+                for score, idx in zip(scores, indexes) if idx != -1]
+
+    def batch_search(self, queries, q_ids: List[str], k: int = 10, threads: int = 1) \
+            -> Dict[str, List[SearchResult]]:
+        """
+
+        Parameters
+        ----------
+        queries : vectors
+        q_ids : List[str]
+            List of corresponding query ids.
+        k : int
+            Number of hits to return.
+        threads : int
+            Maximum number of threads to use.
+
+        Returns
+        -------
+        Dict[str, List[SearchResult]]
+            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
+            results as the values.
+        """
+        if self.is_sparse:
+            queries = [self._token_dict_to_sparse_vector(query) for query in queries]
+            queries = vstack(queries)
+        else:
+            queries = np.array(queries)
+        I, D = zip(*self.index.knnQueryBatch(queries, k=k, num_threads=threads))
+        return {key: [SearchResult(self.docids[idx], -score)
+                      for score, idx in zip(distances, indexes) if idx != -1]
+                for key, distances, indexes in zip(q_ids, D, I)}
+
+    def _load_index(self, index_dir: str, is_sparse: bool):
+        if is_sparse:
+            index = nmslib.init(method='hnsw', space='negdotprod_sparse', data_type=nmslib.DataType.SPARSE_VECTOR)
+        else:
+            index = nmslib.init(method='hnsw', space='negdotprod', data_type=nmslib.DataType.DENSE_VECTOR)
+        index_path = os.path.join(index_dir, 'index.bin')
+        docid_path = os.path.join(index_dir, 'docid')
+        tokens_path = os.path.join(index_dir, 'tokens')
+        metadata_path = os.path.join(index_dir, 'meta')
+        index.loadIndex(index_path, load_data=True)
+        docids = self._load_docids(docid_path)
+        token2id = self._load_tokens(tokens_path)
+        metadata = self._load_metadata(metadata_path)
+        return index, docids, token2id, metadata
+
+    def _token_dict_to_sparse_vector(self, token_dict):
+        matrix_row, matrix_col, matrix_data = [], [], []
+        tokens = token_dict.keys()
+        col = []
+        data = []
+        for tok in tokens:
+            if tok in self.token2id:
+                col.append(self.token2id[tok])
+                data.append(token_dict[tok])
+        matrix_row.extend([0] * len(col))
+        matrix_col.extend(col)
+        matrix_data.extend(data)
+        vector = csr_matrix((matrix_data, (matrix_row, matrix_col)), shape=(1, self.dimension))
+        return vector
+
+    @staticmethod
+    def _load_docids(docid_path: str) -> List[str]:
+        docids = [line.rstrip() for line in open(docid_path, 'r').readlines()]
+        return docids
+
+    @staticmethod
+    def _load_tokens(tokens_path: str):
+        if not os.path.exists(tokens_path):
+            return None
+        tokens = [line.rstrip() for line in open(tokens_path, 'r').readlines()]
+        return dict(zip(tokens, range(len(tokens))))
+
+    @staticmethod
+    def _load_metadata(metadata_path):
+        if not os.path.exists(metadata_path):
+            return None
+        meta = json.load(open(metadata_path))
+        return meta
diff --git a/pyserini/setup.py b/pyserini/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cc1560c24235a5242bad80c9735a52fa29c2ef5
--- /dev/null
+++ b/pyserini/setup.py
@@ -0,0 +1,40 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Module for adding Anserini jar to classpath for pyjnius usage
+"""
+
+import glob
+import os
+
+import jnius_config
+
+
+def configure_classpath(anserini_root="."):
+    """
+    Parameters
+    ----------
+    anserini_root : str
+        (Optional) path to root anserini directory.
+
+    """
+    paths = glob.glob(os.path.join(anserini_root, 'anserini-*-fatjar.jar'))
+    if not paths:
+        raise Exception('No matching jar file found in {}'.format(os.path.abspath(anserini_root)))
+
+    latest = max(paths, key=os.path.getctime)
+    jnius_config.add_classpath(latest)
diff --git a/pyserini/tokenize_json_collection.py b/pyserini/tokenize_json_collection.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5ae47d6e6f6b7793bca32965c1946db1ffcb4fc
--- /dev/null
+++ b/pyserini/tokenize_json_collection.py
@@ -0,0 +1,62 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import json
+import os
+
+from transformers import BertTokenizer, T5Tokenizer
+
+
+def write_to_file(tokenizer, input, output):
+    with open(input, encoding='utf-8') as f:
+        out_f = open(output, 'w')
+        for i, line in enumerate(f):
+            fdict = json.loads(line)
+            contents = fdict['contents']
+            tok = tokenizer.tokenize(contents)
+            tokcont = ' '
+            fdict['contents'] = tokcont.join(tok)
+            out_f.write(json.dumps(fdict) + '\n')
+            if i % 10000 == 0:
+                print(f'Converted {i:,} docs, writing into file {output}')
+        out_f.close()
+
+
+def main(args):
+    if ('bert' in args.tokenizer):
+        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    else:
+        tokenizer = T5Tokenizer.from_pretrained('castorini/doc2query-t5-base-msmarco')
+    if (os.path.isdir(args.input)):
+        for i, inf in enumerate(sorted(os.listdir(args.input))):
+            if not os.path.isdir(args.output):
+                os.mkdir(args.output)
+            outf = os.path.join(args.output, 'docs{:02d}.json'.format(i))
+            write_to_file(tokenizer,os.path.join(args.input, inf), outf)
+    else:
+        write_to_file(tokenizer,args.input, args.output)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str, help='Input file/dir', required=True)
+    parser.add_argument("--output", type=str, help='Output file/dir', required=True)
+    parser.add_argument("--tokenizer", type=str, help='full name of tokenizer', default='bert-base-uncased')
+
+    args = parser.parse_args()
+
+    main(parser.parse_args())
\ No newline at end of file
diff --git a/pyserini/trectools/__init__.py b/pyserini/trectools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5faca29258764830765c3a4709f81c4fded322f
--- /dev/null
+++ b/pyserini/trectools/__init__.py
@@ -0,0 +1,19 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import AggregationMethod, RescoreMethod, TrecRun, Qrels
+
+__all__ = ['AggregationMethod', 'RescoreMethod', 'TrecRun', 'Qrels']
diff --git a/pyserini/trectools/__pycache__/__init__.cpython-310.pyc b/pyserini/trectools/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2bb310a213c36dc35fe93cfbcabfcc12b3383049
Binary files /dev/null and b/pyserini/trectools/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pyserini/trectools/__pycache__/_base.cpython-310.pyc b/pyserini/trectools/__pycache__/_base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ec8cd46da44b58792e55ac20c2dfe55222dbea1
Binary files /dev/null and b/pyserini/trectools/__pycache__/_base.cpython-310.pyc differ
diff --git a/pyserini/trectools/_base.py b/pyserini/trectools/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..e75a13a8b11cc57c5b5f777f84b7f9be7935fc42
--- /dev/null
+++ b/pyserini/trectools/_base.py
@@ -0,0 +1,351 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import itertools
+import numpy as np
+import pandas as pd
+
+from concurrent.futures import ThreadPoolExecutor
+from copy import deepcopy
+from enum import Enum
+from typing import List, Set, Tuple
+
+
+class AggregationMethod(Enum):
+    SUM = 'sum'
+
+
+class RescoreMethod(Enum):
+    RRF = 'rrf'
+    SCALE = 'scale'
+    NORMALIZE = 'normalize'
+
+
+class Qrels:
+    """Wrapper class for TREC Qrels.
+
+    Parameters
+    ----------
+    filepath : str
+        File path of a given TREC Qrels.
+    """
+
+    columns = ['topic', 'q0', 'docid', 'relevance_grade']
+
+    def __init__(self, filepath: str = None):
+        self.filepath = filepath
+        self.qrels_data = pd.DataFrame(columns=Qrels.columns)
+
+        if filepath is not None:
+            self.read_run(self.filepath)
+
+    def read_run(self, filepath: str):
+        self.qrels_data = pd.read_csv(filepath, sep='\s+', names=Qrels.columns)
+
+    def get_relevance_grades(self) -> Set[str]:
+        """Return a set with all relevance grades."""
+
+        return set(sorted(self.qrels_data["relevance_grade"].unique()))
+
+    def topics(self) -> Set[str]:
+        """Return a set with all topics."""
+
+        return set(sorted(self.qrels_data["topic"].unique()))
+
+    def get_docids(self, topic, relevance_grades=None) -> List[str]:
+        """"Return a list of docids for a given topic and a list relevance grades.
+
+        Parameters:
+        ----------
+        relevance : List[int]
+            E.g. [0, 1, 2]. If not provided, then all relevance will be returned.
+        topic : int
+        """
+
+        if relevance_grades is None:
+            relevance_grades = self.get_relevance_grades()
+
+        filtered_df = self.qrels_data[self.qrels_data['topic'] == topic]
+        filtered_df = filtered_df[filtered_df['relevance_grade'].isin(relevance_grades)]
+
+        return filtered_df['docid'].tolist()
+
+
+class TrecRun:
+    """Wrapper class for a TREC run.
+
+    Parameters
+    ----------
+    filepath : str
+        File path of a given TREC Run.
+    """
+
+    columns = ['topic', 'q0', 'docid', 'rank', 'score', 'tag']
+
+    def __init__(self, filepath: str = None, resort: bool = False):
+        self.reset_data()
+        self.filepath = filepath
+        self.resort = resort
+
+        if filepath is not None:
+            self.read_run(self.filepath,self.resort)
+
+    def reset_data(self):
+        self.run_data = pd.DataFrame(columns=TrecRun.columns)
+
+    def read_run(self, filepath: str, resort: bool = False) -> None:
+        self.run_data = pd.read_csv(filepath, sep='\s+', names=TrecRun.columns, dtype={'docid': 'str'})
+        if resort:
+            self.run_data.sort_values(["topic", "score"], inplace=True, ascending=[True, False])
+            self.run_data["rank"] = self.run_data.groupby("topic")["score"].rank(ascending=False,method='first')
+
+    def topics(self) -> Set[str]:
+        """Return a set with all topics."""
+        return set(sorted(self.run_data["topic"].unique()))
+
+    def clone(self):
+        """Return a deep copy of the current instance."""
+        return deepcopy(self)
+
+    def save_to_txt(self, output_path: str, tag: str = None) -> None:
+        if len(self.run_data) == 0:
+            raise Exception('Nothing to save. TrecRun is empty')
+
+        if tag is not None:
+            self.run_data['tag'] = tag
+
+        self.run_data = self.run_data.sort_values(by=['topic', 'score'], ascending=[True, False])
+        self.run_data.to_csv(output_path, sep=' ', header=False, index=False)
+
+    def get_docs_by_topic(self, topic: str, max_docs: int = None):
+        docs = self.run_data[self.run_data['topic'] == topic]
+
+        if max_docs is not None:
+            docs = docs.head(max_docs)
+
+        return docs
+
+    def rescore(self, method: RescoreMethod, rrf_k: int = None, scale: float = None):
+        # Refer to this guide on how to efficiently manipulate dataframes: https://engineering.upside.com/a-beginners-guide-to-optimizing-pandas-code-for-speed-c09ef2c6a4d6
+        if method == RescoreMethod.RRF:
+            assert rrf_k is not None, 'Parameter "rrf_k" must be a valid integer.'
+            self.run_data['score'] = 1 / (rrf_k + self.run_data['rank'].values)
+        elif method == RescoreMethod.SCALE:
+            assert scale is not None, 'Parameter "scale" must not be none.'
+            self.run_data['score'] = self.run_data['score'].values * scale
+        elif method == RescoreMethod.NORMALIZE:
+            for topic in self.topics():
+                scores = self.run_data[self.run_data['topic'] == topic]['score'].copy().values
+                low = np.min(scores)
+                high = np.max(scores)
+
+                if high - low == 0:
+                    self.run_data.loc[self.run_data['topic'] == topic, 'score'] = 1
+                else:
+                    scores = (scores - low) / (high - low)
+                    scores = [float(score) for score in scores]
+                    self.run_data.loc[self.run_data['topic'] == topic, 'score'] = scores
+        else:
+            raise NotImplementedError()
+
+        return self
+
+    def to_numpy(self) -> np.ndarray:
+        return self.run_data.to_numpy(copy=True)
+
+    def discard_qrels(self, qrels: Qrels, clone=True):
+        """Discard each docid in self if docid is also in the given qrels.
+        This operation is performed on each topic separately.
+
+        Parameters:
+        ----------
+        qrels : Qrels
+            Qrels with docids to remove from TrecRun.
+        clone : Bool
+            Return a new TrecRun object if True, else self will be modified and returned.
+        """
+
+        return self._filter_from_qrels(qrels, False, clone=clone)
+
+    def retain_qrels(self, qrels: Qrels, clone=True):
+        """Retain each docid in self if docid is also in the given qrels.
+        This operation is performed on each topic separately.
+        After this operation, judged@x based on the given qrels should be 1.
+
+        Parameters:
+        ----------
+        qrels : Qrels
+            Qrels with docids to keep in TrecRun.
+        clone : Bool
+            Return a new TrecRun object if True, else self will be modified and returned.
+        """
+
+        return self._filter_from_qrels(qrels, True, clone=clone)
+
+    def _filter_from_qrels(self, qrels: Qrels, keep: bool, clone=True):
+        """Private helper function to remove/keep each docid in self if docid is also in the given Qrels object.
+        This operation is performed on each topic separately.
+
+        Parameters:
+        ----------
+        qrels : Qrels
+            Qrels with docids to remove from or keep in TrecRun.
+        clone : Bool
+            Return a new TrecRun object if True, else self will be modified and returned.
+        """
+
+        df_list = []
+        for topic in self.topics():
+            if topic not in qrels.topics():
+                continue
+
+            qrels_docids = qrels.get_docids(topic)
+            topic_df = self.run_data[self.run_data['topic'] == topic]
+            if keep is True:
+                topic_df = topic_df[topic_df['docid'].isin(qrels_docids)]
+            else:
+                topic_df = topic_df[~topic_df['docid'].isin(qrels_docids)]
+            df_list.append(topic_df)
+
+        run = TrecRun() if clone is True else self
+        return TrecRun.from_dataframes(df_list, run)
+
+    @staticmethod
+    def get_all_topics_from_runs(runs) -> Set[str]:
+        all_topics = set()
+        for run in runs:
+            all_topics = all_topics.union(run.topics())
+
+        return all_topics
+
+    @staticmethod
+    def merge(runs, aggregation: AggregationMethod, depth: int = None, k: int = None):
+        """Return a TrecRun by aggregating docid in various ways such as summing scores
+
+        Parameters
+        ----------
+        runs : List[TrecRun]
+            List of ``TrecRun`` objects.
+        aggregation : AggregationMethod
+            The aggregation method to use.
+        depth : int
+            Maximum number of results from each input run to consider. Set to ``None`` by default, which indicates that
+            the complete list of results is considered.
+        k : int
+            Length of final results list.  Set to ``None`` by default, which indicates that the union of all input documents
+            are ranked.
+        """
+
+        if len(runs) < 2:
+            raise Exception('Merge requires at least 2 runs.')
+
+        rows = []
+
+        if aggregation == AggregationMethod.SUM:
+            topics = list(TrecRun.get_all_topics_from_runs(runs))
+
+            def merge_topic(topic):
+                doc_scores = dict()
+
+                for run in runs:
+                    for docid, score in run.get_docs_by_topic(topic, depth)[['docid', 'score']].values:
+                        doc_scores[docid] = doc_scores.get(docid, 0.0) + score
+
+                sorted_doc_scores = sorted(iter(doc_scores.items()), key=lambda x: (-x[1], x[0]))
+                sorted_doc_scores = sorted_doc_scores if k is None else sorted_doc_scores[:k]
+
+                return [
+                    (topic, 'Q0', docid, rank, score, 'merge_sum')
+                    for rank, (docid, score) in enumerate(sorted_doc_scores, start=1)
+                ]
+
+            max_workers = max(len(topics)/10, 1)
+            with ThreadPoolExecutor(max_workers=int(max_workers)) as exec:
+                results = list(exec.map(merge_topic, topics))
+
+            rows = list(itertools.chain.from_iterable(results))
+        else:
+            raise NotImplementedError()
+
+        return TrecRun.from_list(rows)
+
+    @staticmethod
+    def from_dataframes(dfs, run=None):
+        """Return a TrecRun by populating dataframe with the provided list of dataframes.
+
+        Parameters
+        ----------
+        dfs: List[Dataframe]
+            A list of Dataframes conforming to TrecRun.columns
+
+        run: TrecRun
+            Set to ``None`` by default. If None, then a new instance of TrecRun will be created.
+            Else, the given TrecRun will be modified.
+        """
+
+        res = TrecRun() if run is None else run
+        res.reset_data()
+        res.run_data = pd.concat([df for df in dfs])
+
+        return res
+
+    @staticmethod
+    def from_list(rows, run=None):
+        """Return a TrecRun by populating dataframe with the provided list of tuples.
+        For performance reasons, df.to_numpy() is faster than df.iterrows().
+        When manipulating dataframes, we first dump to np.ndarray and construct a list of tuples with new values.
+        Then use this function to convert the list of tuples to a TrecRun object.
+
+        Parameters
+        ----------
+        rows: List[tuples]
+            List of tuples in the following format: (topic, 'Q0', docid, rank, score, tag)
+
+        run: TrecRun
+            Set to ``None`` by default. If None, then a new instance of TrecRun will be created.
+            Else, the given TrecRun will be modified.
+        """
+
+        res = TrecRun() if run is None else run
+
+        df = pd.DataFrame(rows)
+        df.columns = TrecRun.columns
+        res.run_data = df.copy()
+
+        return res
+
+    @staticmethod
+    def from_search_results(docid_score_pair: Tuple[str, float], topic=1):
+        rows = []
+
+        for rank, (docid, score) in enumerate(docid_score_pair, start=1):
+            rows.append((topic, 'Q0', docid, rank, score, 'searcher'))
+
+        return TrecRun.from_list(rows)
+
+    @staticmethod
+    def concat(runs):
+        """Return a new TrecRun by concatenating a list of TrecRuns
+
+        Parameters
+        ----------
+        runs : List[TrecRun]
+            List of ``TrecRun`` objects.
+        """
+
+        run = TrecRun()
+        run.run_data = pd.concat([run.run_data for run in runs])
+        return run
diff --git a/pyserini/util.py b/pyserini/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..5309153425c59e89cbb61e9db48aee8a06900bcd
--- /dev/null
+++ b/pyserini/util.py
@@ -0,0 +1,283 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import hashlib
+import os
+import re
+import shutil
+import tarfile
+import logging
+from urllib.error import HTTPError, URLError
+from urllib.request import urlretrieve
+
+import pandas as pd
+from tqdm import tqdm
+
+from pyserini.encoded_query_info import QUERY_INFO
+from pyserini.encoded_corpus_info import CORPUS_INFO
+from pyserini.evaluate_script_info import EVALUATION_INFO
+from pyserini.prebuilt_index_info import TF_INDEX_INFO, FAISS_INDEX_INFO, IMPACT_INDEX_INFO
+
+
+logger = logging.getLogger(__name__)
+
+
+# https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5
+class TqdmUpTo(tqdm):
+    def update_to(self, b=1, bsize=1, tsize=None):
+        """
+        b  : int, optional
+            Number of blocks transferred so far [default: 1].
+        bsize  : int, optional
+            Size of each block (in tqdm units) [default: 1].
+        tsize  : int, optional
+            Total size (in tqdm units). If [default: None] remains unchanged.
+        """
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)  # will also set self.n = b * bsize
+
+
+# For large files, we need to compute MD5 block by block. See:
+# https://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
+def compute_md5(file, block_size=2**20):
+    m = hashlib.md5()
+    with open(file, 'rb') as f:
+        while True:
+            buf = f.read(block_size)
+            if not buf:
+                break
+            m.update(buf)
+    return m.hexdigest()
+
+
+def download_url(url, save_dir, local_filename=None, md5=None, force=False, verbose=True):
+    # If caller does not specify local filename, figure it out from the download URL:
+    if not local_filename:
+        filename = url.split('/')[-1]
+        filename = re.sub('\\?dl=1$', '', filename)  # Remove the Dropbox 'force download' parameter
+    else:
+        # Otherwise, use the specified local_filename:
+        filename = local_filename
+
+    destination_path = os.path.join(save_dir, filename)
+
+    if verbose:
+        print(f'Downloading {url} to {destination_path}...')
+
+    # Check to see if file already exists, if so, simply return (quietly) unless force=True, in which case we remove
+    # destination file and download fresh copy.
+    if os.path.exists(destination_path):
+        if verbose:
+            print(f'{destination_path} already exists!')
+        if not force:
+            if verbose:
+                print(f'Skipping download.')
+            return destination_path
+        if verbose:
+            print(f'force=True, removing {destination_path}; fetching fresh copy...')
+        os.remove(destination_path)
+
+    with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t:
+        urlretrieve(url, filename=destination_path, reporthook=t.update_to)
+
+    if md5:
+        md5_computed = compute_md5(destination_path)
+        assert md5_computed == md5, f'{destination_path} does not match checksum! Expecting {md5} got {md5_computed}.'
+
+    return destination_path
+
+
+def get_cache_home():
+    custom_dir = os.environ.get("PYSERINI_CACHE")
+    if custom_dir is not None and custom_dir != '':
+        return custom_dir
+    return os.path.expanduser(os.path.join(f'~{os.path.sep}.cache', "pyserini"))
+
+def download_and_unpack_index(url, index_directory='indexes', local_filename=False,
+                              force=False, verbose=True, prebuilt=False, md5=None):
+    # If caller does not specify local filename, figure it out from the download URL:
+    if not local_filename:
+        index_name = url.split('/')[-1]
+    else:
+        # Otherwise, use the specified local_filename:
+        index_name = local_filename
+    # Remove the suffix:
+    index_name = re.sub('''.tar.gz.*$''', '', index_name)
+
+    if prebuilt:
+        index_directory = os.path.join(get_cache_home(), index_directory)
+        index_path = os.path.join(index_directory, f'{index_name}.{md5}')
+
+        if not os.path.exists(index_directory):
+            os.makedirs(index_directory)
+
+        local_tarball = os.path.join(index_directory, f'{index_name}.tar.gz')
+        # If there's a local tarball, it's likely corrupted, because we remove the local tarball on success (below).
+        # So, we want to remove.
+        if os.path.exists(local_tarball):
+            os.remove(local_tarball)
+    else:
+        local_tarball = os.path.join(index_directory, f'{index_name}.tar.gz')
+        index_path = os.path.join(index_directory, f'{index_name}')
+
+    # Check to see if index already exists, if so, simply return (quietly) unless force=True, in which case we remove
+    # index and download fresh copy.
+    if os.path.exists(index_path):
+        if not force:
+            if verbose:
+                print(f'{index_path} already exists, skipping download.')
+            return index_path
+        if verbose:
+            print(f'{index_path} already exists, but force=True, removing {index_path} and fetching fresh copy...')
+        shutil.rmtree(index_path)
+
+    print(f'Downloading index at {url}...')
+    download_url(url, index_directory, local_filename=local_filename, verbose=False, md5=md5)
+
+    if verbose:
+        print(f'Extracting {local_tarball} into {index_path}...')
+    try:
+        tarball = tarfile.open(local_tarball)
+    except:
+        local_tarball = os.path.join(index_directory, f'{index_name}')
+        tarball = tarfile.open(local_tarball)
+    dirs_in_tarball = [member.name for member in tarball if member.isdir()]
+    assert len(dirs_in_tarball), f"Detect multiple members ({', '.join(dirs_in_tarball)}) under the tarball {local_tarball}."
+    tarball.extractall(index_directory)
+    tarball.close()
+    os.remove(local_tarball)
+
+    if prebuilt:
+        dir_in_tarball = dirs_in_tarball[0]
+        if dir_in_tarball != index_name:
+            logger.info(f"Renaming {index_directory}/{dir_in_tarball} into {index_directory}/{index_name}.")
+            index_name = dir_in_tarball
+        os.rename(os.path.join(index_directory, f'{index_name}'), index_path)
+
+    return index_path
+
+
+def check_downloaded(index_name):
+    if index_name in TF_INDEX_INFO:
+        target_index = TF_INDEX_INFO[index_name]
+    elif index_name in IMPACT_INDEX_INFO:
+        target_index = IMPACT_INDEX_INFO[index_name]
+    else:
+        target_index = FAISS_INDEX_INFO[index_name]
+    index_url = target_index['urls'][0]
+    index_md5 = target_index['md5']
+    index_name = index_url.split('/')[-1]
+    index_name = re.sub('''.tar.gz.*$''', '', index_name)
+    index_directory = os.path.join(get_cache_home(), 'indexes')
+    index_path = os.path.join(index_directory, f'{index_name}.{index_md5}')
+
+    return os.path.exists(index_path)
+
+
+def get_sparse_indexes_info():
+    df = pd.DataFrame.from_dict({**TF_INDEX_INFO, **IMPACT_INDEX_INFO})
+    for index in df.keys():
+        df[index]['downloaded'] = check_downloaded(index)
+
+    with pd.option_context('display.max_rows', None, 'display.max_columns',
+                           None, 'display.max_colwidth', None, 'display.colheader_justify', 'left'):
+        print(df)
+
+
+def get_impact_indexes_info():
+    df = pd.DataFrame.from_dict(IMPACT_INDEX_INFO)
+    for index in df.keys():
+        df[index]['downloaded'] = check_downloaded(index)
+
+    with pd.option_context('display.max_rows', None, 'display.max_columns',
+                           None, 'display.max_colwidth', None, 'display.colheader_justify', 'left'):
+        print(df)
+
+
+def get_dense_indexes_info():
+    df = pd.DataFrame.from_dict(FAISS_INDEX_INFO)
+    for index in df.keys():
+        df[index]['downloaded'] = check_downloaded(index)
+
+    with pd.option_context('display.max_rows', None, 'display.max_columns',
+                           None, 'display.max_colwidth', None, 'display.colheader_justify', 'left'):
+        print(df)
+
+
+def download_prebuilt_index(index_name, force=False, verbose=True, mirror=None):
+    if index_name not in TF_INDEX_INFO and index_name not in FAISS_INDEX_INFO and index_name not in IMPACT_INDEX_INFO:
+        raise ValueError(f'Unrecognized index name {index_name}')
+    if index_name in TF_INDEX_INFO:
+        target_index = TF_INDEX_INFO[index_name]
+    elif index_name in IMPACT_INDEX_INFO:
+        target_index = IMPACT_INDEX_INFO[index_name]
+    else:
+        target_index = FAISS_INDEX_INFO[index_name]
+    index_md5 = target_index['md5']
+    for url in target_index['urls']:
+        local_filename = target_index['filename'] if 'filename' in target_index else None
+        try:
+            return download_and_unpack_index(url, local_filename=local_filename,
+                                             prebuilt=True, md5=index_md5, verbose=verbose)
+        except (HTTPError, URLError) as e:
+            print(f'Unable to download pre-built index at {url}, trying next URL...')
+    raise ValueError(f'Unable to download pre-built index at any known URLs.')
+
+
+def download_encoded_queries(query_name, force=False, verbose=True, mirror=None):
+    if query_name not in QUERY_INFO:
+        raise ValueError(f'Unrecognized query name {query_name}')
+    query_md5 = QUERY_INFO[query_name]['md5']
+    for url in QUERY_INFO[query_name]['urls']:
+        try:
+            return download_and_unpack_index(url, index_directory='queries', prebuilt=True, md5=query_md5)
+        except (HTTPError, URLError) as e:
+            print(f'Unable to download encoded query at {url}, trying next URL...')
+    raise ValueError(f'Unable to download encoded query at any known URLs.')
+
+
+def download_encoded_corpus(corpus_name, force=False, verbose=True, mirror=None):
+    if corpus_name not in CORPUS_INFO:
+        raise ValueError(f'Unrecognized corpus name {corpus_name}')
+    corpus_md5 = CORPUS_INFO[corpus_name]['md5']
+    for url in CORPUS_INFO[corpus_name]['urls']:
+        local_filename = CORPUS_INFO[corpus_name]['filename'] if 'filename' in CORPUS_INFO[corpus_name] else None
+        try:
+            return download_and_unpack_index(url, local_filename=local_filename, index_directory='corpus', prebuilt=True, md5=corpus_md5)
+        except (HTTPError, URLError) as e:
+            print(f'Unable to download encoded corpus at {url}, trying next URL...')
+    raise ValueError(f'Unable to download encoded corpus at any known URLs.')
+
+
+def download_evaluation_script(evaluation_name, force=False, verbose=True, mirror=None):
+    if evaluation_name not in EVALUATION_INFO:
+        raise ValueError(f'Unrecognized evaluation name {evaluation_name}')
+    for url in EVALUATION_INFO[evaluation_name]['urls']:
+        try:
+            save_dir = os.path.join(get_cache_home(), 'eval')
+            if not os.path.exists(save_dir):
+                os.makedirs(save_dir)
+            return download_url(url, save_dir=save_dir)
+        except HTTPError:
+            print(f'Unable to download evaluation script at {url}, trying next URL...')
+    raise ValueError(f'Unable to download evaluation script at any known URLs.')
+
+
+def get_sparse_index(index_name):
+    if index_name not in FAISS_INDEX_INFO:
+        raise ValueError(f'Unrecognized index name {index_name}')
+    return FAISS_INDEX_INFO[index_name]["texts"]
diff --git a/pyserini/vectorizer/__init__.py b/pyserini/vectorizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dafc6252140de2bc22696d1d19d5df7da82ccc67
--- /dev/null
+++ b/pyserini/vectorizer/__init__.py
@@ -0,0 +1,19 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ._base import BM25Vectorizer, TfidfVectorizer
+
+__all__ = ['BM25Vectorizer', 'TfidfVectorizer']
diff --git a/pyserini/vectorizer/_base.py b/pyserini/vectorizer/_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..255656c3848cfc3b6e436c2775cefc6ab05b05d1
--- /dev/null
+++ b/pyserini/vectorizer/_base.py
@@ -0,0 +1,194 @@
+#
+# Pyserini: Reproducible IR research with sparse and dense representations
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import math
+from typing import List, Optional
+from sklearn.preprocessing import normalize
+
+from scipy.sparse import csr_matrix
+
+from pyserini import index, search
+from pyserini.analysis import Analyzer, get_lucene_analyzer
+from tqdm import tqdm
+
+
+class Vectorizer:
+    """Base class for vectorizer implemented on top of Pyserini.
+
+    Parameters
+    ----------
+    lucene_index_path : str
+        Path to lucene index folder
+    min_df : int
+        Minimum acceptable document frequency
+    verbose : bool
+        Whether to print out debugging information
+    """
+
+    def __init__(self, lucene_index_path: str, min_df: int = 1, verbose: bool = False):
+        self.min_df: int = min_df
+        self.verbose: bool = verbose
+        self.index_reader = index.IndexReader(lucene_index_path)
+        self.searcher = search.LuceneSearcher(lucene_index_path)
+        self.num_docs: int = self.searcher.num_docs
+        self.stats = self.index_reader.stats()
+        self.analyzer = Analyzer(get_lucene_analyzer())
+
+        # build vocabulary
+        self.vocabulary_ = set()
+        for term in self.index_reader.terms():
+            if term.df > self.min_df:
+                self.vocabulary_.add(term.term)
+        self.vocabulary_ = sorted(self.vocabulary_)
+
+        # build term to index mapping
+        self.term_to_index = {}
+        for i, term in enumerate(self.vocabulary_):
+            self.term_to_index[term] = i
+        self.vocabulary_size = len(self.vocabulary_)
+
+        if self.verbose:
+            print(f'Found {self.vocabulary_size} terms with min_df={self.min_df}')
+
+    def get_query_vector(self, query: str):
+        matrix_row, matrix_col, matrix_data = [], [], []
+        tokens = self.analyzer.analyze(query)
+        for term in tokens:
+            if term in self.vocabulary_:
+                matrix_row.append(0)
+                matrix_col.append(self.term_to_index[term])
+                matrix_data.append(1)
+        vectors = csr_matrix((matrix_data, (matrix_row, matrix_col)), shape=(1, self.vocabulary_size))
+        return vectors
+
+
+class TfidfVectorizer(Vectorizer):
+    """Wrapper class for tf-idf vectorizer implemented on top of Pyserini.
+
+    Parameters
+    ----------
+    lucene_index_path : str
+        Path to lucene index folder
+    min_df : int
+        Minimum acceptable document frequency
+    verbose : bool
+        Whether to print out debugging information
+    """
+
+    def __init__(self, lucene_index_path: str, min_df: int = 1, verbose: bool = False):
+        super().__init__(lucene_index_path, min_df, verbose)
+
+        self.idf_ = {}
+        for term in self.index_reader.terms():
+            self.idf_[term.term] = math.log(self.num_docs / term.df)
+
+    def get_vectors(self, docids: List[str], norm: Optional[str] = 'l2'):
+        """Get the tf-idf vectors given a list of docids
+
+        Parameters
+        ----------
+        norm : str
+            Normalize the sparse matrix
+        docids : List[str]
+            The piece of text to analyze.
+
+        Returns
+        -------
+        csr_matrix
+            Sparse matrix representation of tf-idf vectors
+        """
+        matrix_row, matrix_col, matrix_data = [], [], []
+        num_docs = len(docids)
+
+        for index, doc_id in enumerate(tqdm(docids)):
+            # Term Frequency
+            tf = self.index_reader.get_document_vector(doc_id)
+            if tf is None:
+                continue
+
+            # Filter out in-eligible terms
+            tf = {t: tf[t] for t in tf if t in self.term_to_index}
+
+            # Convert from dict to sparse matrix
+            for term in tf:
+                tfidf = tf[term] * self.idf_[term]
+                matrix_row.append(index)
+                matrix_col.append(self.term_to_index[term])
+                matrix_data.append(tfidf)
+
+        vectors = csr_matrix((matrix_data, (matrix_row, matrix_col)), shape=(num_docs, self.vocabulary_size))
+
+        if norm:
+            return normalize(vectors, norm=norm)
+        return vectors
+
+
+class BM25Vectorizer(Vectorizer):
+    """Wrapper class for BM25 vectorizer implemented on top of Pyserini.
+
+    Parameters
+    ----------
+    lucene_index_path : str
+        Path to lucene index folder
+    min_df : int
+        Minimum acceptable document frequency
+    verbose : bool
+        Whether to print out debugging information
+    """
+
+    def __init__(self, lucene_index_path: str, min_df: int = 1, verbose: bool = False):
+        super().__init__(lucene_index_path, min_df, verbose)
+
+    def get_vectors(self, docids: List[str], norm: Optional[str] = 'l2'):
+        """Get the BM25 vectors given a list of docids
+
+        Parameters
+        ----------
+        norm : str
+            Normalize the sparse matrix
+        docids : List[str]
+            The piece of text to analyze.
+
+        Returns
+        -------
+        csr_matrix
+            Sparse matrix representation of BM25 vectors
+        """
+        matrix_row, matrix_col, matrix_data = [], [], []
+        num_docs = len(docids)
+
+        for index, doc_id in enumerate(tqdm(docids)):
+
+            # Term Frequency
+            tf = self.index_reader.get_document_vector(doc_id)
+            if tf is None:
+                continue
+
+            # Filter out in-eligible terms
+            tf = {t: tf[t] for t in tf if t in self.term_to_index}
+
+            # Convert from dict to sparse matrix
+            for term in tf:
+                bm25_weight = self.index_reader.compute_bm25_term_weight(doc_id, term, analyzer=None)
+                matrix_row.append(index)
+                matrix_col.append(self.term_to_index[term])
+                matrix_data.append(bm25_weight)
+
+        vectors = csr_matrix((matrix_data, (matrix_row, matrix_col)), shape=(num_docs, self.vocabulary_size))
+
+        if norm:
+            return normalize(vectors, norm=norm)
+        return vectors