leaderboard

Running on CPU Upgrade

App Files Files Community

leaderboard / src /about.py

hanhainebula

Modify the evaluation steps

606d718 6 months ago

raw

history blame

4.23 kB

	# Your leaderboard name
	TITLE = """<h1 align="center" id="space-title">AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark
	(Preview) </h1>"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """
	Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
	"""

	# Which evaluations are you running? how can people reproduce what you have?
	BENCHMARKS_TEXT = f"""
	## How it works

	Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
	"""

	EVALUATION_QUEUE_TEXT = """
	## Steps for submit to AIR-Bench

	1. Install AIR-Bench
	```bash
	pip install air-benchmark
	```
	2. Run the evaluation script
	```bash
	cd AIR-Bench/scripts
	# Run all tasks
	python run_air_benchmark.py \\
	--output_dir ./search_results \\
	--encoder BAAI/bge-m3 \\
	--reranker BAAI/bge-reranker-v2-m3 \\
	--search_top_k 1000 \\
	--rerank_top_k 100 \\
	--max_query_length 512 \\
	--max_passage_length 512 \\
	--batch_size 512 \\
	--pooling_method cls \\
	--normalize_embeddings True \\
	--use_fp16 True \\
	--add_instruction False \\
	--overwrite False

	# Run the tasks in the specified task type
	python run_air_benchmark.py \\
	--task_types long-doc \\
	--output_dir ./search_results \\
	--encoder BAAI/bge-m3 \\
	--reranker BAAI/bge-reranker-v2-m3 \\
	--search_top_k 1000 \\
	--rerank_top_k 100 \\
	--max_query_length 512 \\
	--max_passage_length 512 \\
	--batch_size 512 \\
	--pooling_method cls \\
	--normalize_embeddings True \\
	--use_fp16 True \\
	--add_instruction False \\
	--overwrite False

	# Run the tasks in the specified task type and domains
	python run_air_benchmark.py \\
	--task_types long-doc \\
	--domains arxiv book \\
	--output_dir ./search_results \\
	--encoder BAAI/bge-m3 \\
	--reranker BAAI/bge-reranker-v2-m3 \\
	--search_top_k 1000 \\
	--rerank_top_k 100 \\
	--max_query_length 512 \\
	--max_passage_length 512 \\
	--batch_size 512 \\
	--pooling_method cls \\
	--normalize_embeddings True \\
	--use_fp16 True \\
	--add_instruction False \\
	--overwrite False

	# Run the tasks in the specified languages
	python run_air_benchmark.py \\
	--languages en \\
	--output_dir ./search_results \\
	--encoder BAAI/bge-m3 \\
	--reranker BAAI/bge-reranker-v2-m3 \\
	--search_top_k 1000 \\
	--rerank_top_k 100 \\
	--max_query_length 512 \\
	--max_passage_length 512 \\
	--batch_size 512 \\
	--pooling_method cls \\
	--normalize_embeddings True \\
	--use_fp16 True \\
	--add_instruction False \\
	--overwrite False

	# Run the tasks in the specified task type, domains, and languages
	python run_air_benchmark.py \\
	--task_types qa \\
	--domains wiki web \\
	--languages en \\
	--output_dir ./search_results \\
	--encoder BAAI/bge-m3 \\
	--reranker BAAI/bge-reranker-v2-m3 \\
	--search_top_k 1000 \\
	--rerank_top_k 100 \\
	--max_query_length 512 \\
	--max_passage_length 512 \\
	--batch_size 512 \\
	--pooling_method cls \\
	--normalize_embeddings True \\
	--use_fp16 True \\
	--add_instruction False \\
	--overwrite False
	```
	3. Package the search results.
	```bash
	# Zip "Embedding Model + NoReranker" search results in "<search_results>/<model_name>/NoReranker" to "<save_dir>/<model_name>_NoReranker.zip".
	python zip_results.py \\
	--results_dir search_results \\
	--model_name bge-m3 \\
	--save_dir search_results/zipped_results

	# Zip "Embedding Model + Reranker" search results in "<search_results>/<model_name>/<reranker_name>" to "<save_dir>/<model_name>_<reranker_name>.zip".
	python zip_results.py \\
	--results_dir search_results \\
	--model_name bge-m3 \\
	--reranker_name bge-reranker-v2-m3 \\
	--save_dir search_results/zipped_results
	```
	4. Upload the `.zip` file on this page and fill in the model information:
	- Model Name: such as `bge-m3`.
	- Model URL: such as `https://huggingface.co/BAAI/bge-m3`.
	- Reranker Name: such as `bge-reranker-v2-m3`. Keep empty for `NoReranker`.
	- Reranker URL: such as `https://huggingface.co/BAAI/bge-reranker-v2-m3`. Keep empty for `NoReranker`.

	If you want to stay anonymous, you can only fill in the Model Name and Reranker Name (keep empty for `NoReranker`), and check the selection box below befor submission.

	5. Congratulation! Your results will be shown on the leaderboard in up to one hour.
	"""

	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	CITATION_BUTTON_TEXT = r"""
	"""