Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import re | |
import json | |
import os | |
import pathlib | |
import shutil | |
def get_ir_dataset_names(): | |
url = "https://raw.githubusercontent.com/allenai/ir_datasets/master/ir_datasets/etc/metadata.json" | |
# read in the json | |
with requests.get(url) as r: | |
data = json.loads(r.text) | |
names = [] | |
for dataset in data: | |
if "docs" in data[dataset] and "queries" in data[dataset] and "qrels" in data[dataset]: | |
names.append(dataset) | |
return names | |
if __name__ == "__main__": | |
names = get_ir_dataset_names() | |
with open("ir_dataset_names.json", "w") as fout: | |
json.dump(names, fout, indent=4) |