Spaces:
Sleeping
Sleeping
File size: 675 Bytes
68ecf38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import requests
from bs4 import BeautifulSoup
import re
import json
import os
import pathlib
import shutil
def get_ir_dataset_names():
url = "https://raw.githubusercontent.com/allenai/ir_datasets/master/ir_datasets/etc/metadata.json"
# read in the json
with requests.get(url) as r:
data = json.loads(r.text)
names = []
for dataset in data:
if "docs" in data[dataset] and "queries" in data[dataset] and "qrels" in data[dataset]:
names.append(dataset)
return names
if __name__ == "__main__":
names = get_ir_dataset_names()
with open("ir_dataset_names.json", "w") as fout:
json.dump(names, fout, indent=4) |