Check-my-progress-Audio-Course

Running

App Files Files Community

Check-my-progress-Audio-Course / app.py

MariaK

Removed the certification deadline

e9f1e55 about 1 year ago

raw

history blame contribute delete

7.79 kB

	import gradio as gr
	from huggingface_hub import HfApi, hf_hub_download
	from huggingface_hub.repocard import metadata_load
	import requests
	import re
	import pandas as pd
	from huggingface_hub import ModelCard
	import os


	def pass_emoji(passed):
	if passed is True:
	passed = "✅"
	else:
	passed = "❌"
	return passed

	api = HfApi()
	USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
	HF_TOKEN = os.environ.get("HF_TOKEN")


	def get_user_models(hf_username, task):
	"""
	List the user's models for a given task
	:param hf_username: User HF username
	"""
	models = api.list_models(author=hf_username, filter=[task])
	user_model_ids = [x.modelId for x in models]

	match task:
	case "audio-classification":
	dataset = 'marsyas/gtzan'
	case "automatic-speech-recognition":
	dataset = 'PolyAI/minds14'
	case "text-to-speech":
	dataset = ""
	case _:
	print("Unsupported task")

	dataset_specific_models = []

	if dataset == "":
	return user_model_ids
	else:
	for model in user_model_ids:
	meta = get_metadata(model)
	if meta is None:
	continue
	try:
	if meta["datasets"] == [dataset]:
	dataset_specific_models.append(model)
	except:
	continue
	return dataset_specific_models

	def calculate_best_result(user_models, task):
	"""
	Calculate the best results of a unit for a given task
	:param user_model_ids: models of a user
	"""

	best_model = ""

	if task == "audio-classification":
	best_result = -100
	larger_is_better = True
	elif task == "automatic-speech-recognition":
	best_result = 100
	larger_is_better = False

	for model in user_models:
	meta = get_metadata(model)
	if meta is None:
	continue
	metric = parse_metrics(model, task)

	if metric == None:
	continue

	if larger_is_better:
	if metric > best_result:
	best_result = metric
	best_model = meta['model-index'][0]["name"]
	else:
	if metric < best_result:
	best_result = metric
	best_model = meta['model-index'][0]["name"]

	return best_result, best_model


	def get_metadata(model_id):
	"""
	Get model metadata (contains evaluation data)
	:param model_id
	"""
	try:
	readme_path = hf_hub_download(model_id, filename="README.md")
	return metadata_load(readme_path)
	except requests.exceptions.HTTPError:
	# 404 README.md not found
	return None


	def extract_metric(model_card_content, task):
	"""
	Extract the metric value from the models' model card
	:param model_card_content: model card content
	"""
	accuracy_pattern = r"(?:Accuracy\|eval_accuracy): (\d+\.\d+)"
	wer_pattern = r"Wer: (\d+\.\d+)"

	if task == "audio-classification":
	pattern = accuracy_pattern
	elif task == "automatic-speech-recognition":
	pattern = wer_pattern

	match = re.search(pattern, model_card_content)
	if match:
	metric = match.group(1)
	return float(metric)
	else:
	return None


	def parse_metrics(model, task):
	"""
	Get model card and parse it
	:param model_id: model id
	"""
	card = ModelCard.load(model)
	return extract_metric(card.content, task)


	def certification(hf_username):
	results_certification = [
	{
	"unit": "Unit 4: Audio Classification",
	"task": "audio-classification",
	"baseline_metric": 0.87,
	"best_result": 0,
	"best_model_id": "",
	"passed_": False
	},
	{
	"unit": "Unit 5: Automatic Speech Recognition",
	"task": "automatic-speech-recognition",
	"baseline_metric": 0.37,
	"best_result": 0,
	"best_model_id": "",
	"passed_": False
	},
	{
	"unit": "Unit 6: Text-to-Speech",
	"task": "text-to-speech",
	"baseline_metric": 0,
	"best_result": 0,
	"best_model_id": "",
	"passed_": False
	},
	{
	"unit": "Unit 7: Audio applications",
	"task": "demo",
	"baseline_metric": 0,
	"best_result": 0,
	"best_model_id": "",
	"passed_": False
	},
	]

	for unit in results_certification:
	unit["passed"] = pass_emoji(unit["passed_"])

	match unit["task"]:
	case "audio-classification":
	try:
	user_ac_models = get_user_models(hf_username, task = "audio-classification")
	best_result, best_model_id = calculate_best_result(user_ac_models, task = "audio-classification")
	unit["best_result"] = best_result
	unit["best_model_id"] = best_model_id
	if unit["best_result"] >= unit["baseline_metric"]:
	unit["passed_"] = True
	unit["passed"] = pass_emoji(unit["passed_"])
	except: print("Either no relevant models found, or no metrics in the model card for audio classificaiton")
	case "automatic-speech-recognition":
	try:
	user_asr_models = get_user_models(hf_username, task = "automatic-speech-recognition")
	best_result, best_model_id = calculate_best_result(user_asr_models, task = "automatic-speech-recognition")
	unit["best_result"] = best_result
	unit["best_model_id"] = best_model_id
	if unit["best_result"] <= unit["baseline_metric"]:
	unit["passed_"] = True
	unit["passed"] = pass_emoji(unit["passed_"])
	except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
	case "text-to-speech":
	try:
	user_tts_models = get_user_models(hf_username, task = "text-to-speech")
	if user_tts_models:
	unit["best_result"] = 0
	unit["best_model_id"] = user_tts_models[0]
	unit["passed_"] = True
	unit["passed"] = pass_emoji(unit["passed_"])
	except: print("Either no relevant models found, or no metrics in the model card for automatic speech recognition")
	case "demo":
	u7_usernames = hf_hub_download(USERNAMES_DATASET_ID, repo_type = "dataset", filename="usernames.csv", token=HF_TOKEN)
	u7_users = pd.read_csv(u7_usernames)
	if hf_username in u7_users['username'].tolist():
	unit["best_result"] = 0
	unit["best_model_id"] = "Demo check passed, no model id"
	unit["passed_"] = True
	unit["passed"] = pass_emoji(unit["passed_"])
	case _:
	print("Unknown task")

	print(results_certification)

	df = pd.DataFrame(results_certification)
	df = df[['passed', 'unit', 'task', 'baseline_metric', 'best_result', 'best_model_id']]
	return df

	with gr.Blocks() as demo:
	gr.Markdown(f"""
	# 🏆 Check your progress in the Audio Course 🏆

	- To get a certificate of completion, you must pass 3 out of 4 assignments.
	- To get an honors certificate, you must pass 4 out of 4 assignments.

	For the assignments where you have to train a model, your model's metric should be equal to or better than the baseline metric.
	For the Unit 7 assignment, first, check your demo with the [Unit 7 assessment space](https://huggingface.co/spaces/huggingface-course/audio-course-u7-assessment)

	Make sure that you have uploaded your model(s) to Hub, and that your Unit 7 demo is public.
	To check your progress, type your Hugging Face Username here (in my case MariaK)
	""")

	hf_username = gr.Textbox(placeholder="MariaK", label="Your Hugging Face Username")
	check_progress_button = gr.Button(value="Check my progress")
	output = gr.components.Dataframe(value=certification(hf_username))
	check_progress_button.click(fn=certification, inputs=hf_username, outputs=output)

	demo.launch()