File size: 675 Bytes
68ecf38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import requests
from bs4 import BeautifulSoup
import re
import json
import os
import pathlib
import shutil



def get_ir_dataset_names():
    url = "https://raw.githubusercontent.com/allenai/ir_datasets/master/ir_datasets/etc/metadata.json"
    # read in the json
    with requests.get(url) as r:
        data = json.loads(r.text)
    names = []
    for dataset in data:
        if "docs" in data[dataset] and "queries" in data[dataset] and "qrels" in data[dataset]:
            names.append(dataset)
    return names


if __name__ == "__main__":
    names = get_ir_dataset_names()
    with open("ir_dataset_names.json", "w") as fout:
        json.dump(names, fout, indent=4)