{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "_NrjL2ccH3yp" }, "source": [ "RECOMMENDATION MODEL" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "IZfnA6W_GDyf" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.metrics.pairwise import cosine_similarity" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "MV-7idG1F_NU" }, "outputs": [], "source": [ "# Mock data creation\n", "def create_mock_data():\n", " users_data = \"1st_train.csv\"\n", " # \"/content/sample_data/train_train.csv\"\n", " applicants = pd.read_csv(users_data)\n", "\n", " jobs_data = \"jobs_data.csv\"\n", " companies = pd.read_csv(jobs_data)\n", "\n", " train_applicants = applicants\n", " test_data = \"1st_test.csv\"\n", " # \"/content/sample_data/test_train.csv\"\n", " test_applicants = pd.read_csv(test_data)\n", "\n", " return train_applicants, test_applicants, companies" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "4VTpcXhz-5TN" }, "outputs": [], "source": [ "# @title\n", "# # Mock data creation\n", "# def create_mock_data():\n", "# users_data = \"/content/sample_data/rematch_train_candidate_field.csv\"\n", "# applicants = pd.read_csv(users_data)\n", "\n", "# jobs_data = \"/content/sample_data/jobs_data.csv\"\n", "# companies = pd.read_csv(jobs_data)\n", "\n", "# # train_applicants = applicants\n", "# # test_data = \"/content/sample_data/test_data_new.csv\"\n", "# # test_applicants = pd.read_csv(test_data)\n", "\n", "# train_applicants = applicants[:10000]\n", "# test_applicants = applicants[10000:]\n", "\n", "# return train_applicants, test_applicants, companies" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "wF1oZ6Ez96BE" }, "outputs": [], "source": [ "train_user, test_user, jobs = create_mock_data()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Gj8tJNrph8Go", "outputId": "a44b8cf0-a56f-4cd2-bbda-ca9bcabf35a0" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training data size: 18979\n", "Test data size: 4745\n" ] } ], "source": [ "print(\"Training data size:\", train_user.shape[0])\n", "print(\"Test data size:\", test_user.shape[0])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "d0XY4al7K0UT" }, "outputs": [], "source": [ "list_hard_skill = [test_user[\"hard_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(test_user))]\n", "list_soft_skill = [test_user[\"soft_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(test_user))]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 213 }, "id": "JOZ9_NlLK8uS", "outputId": "17d09f55-192f-4486-bb47-b56f525d44a3" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDcandidate_fieldlabelhard_skillsoft_skillfinal_hard_skillfinal_soft_skill
014649it jobs1['act', 'advertising sales', 'algorithms', 'bu...['collaboration', 'decision making', 'operatio...act, advertising sales, algorithms, business, ...collaboration, decision making, operations, wr...
1801marketing0['act', 'brand communication', 'business', 'bu...['collaboration', 'customer service', 'managem...act, brand communication, business, business d...collaboration, customer service, management
24393accounting0['application', 'balance sheet', 'finance', 'p...['filing', 'management']application, balance sheet, finance, property ...filing, management
\n", "
" ], "text/plain": [ " User ID candidate_field label \\\n", "0 14649 it jobs 1 \n", "1 801 marketing 0 \n", "2 4393 accounting 0 \n", "\n", " hard_skill \\\n", "0 ['act', 'advertising sales', 'algorithms', 'bu... \n", "1 ['act', 'brand communication', 'business', 'bu... \n", "2 ['application', 'balance sheet', 'finance', 'p... \n", "\n", " soft_skill \\\n", "0 ['collaboration', 'decision making', 'operatio... \n", "1 ['collaboration', 'customer service', 'managem... \n", "2 ['filing', 'management'] \n", "\n", " final_hard_skill \\\n", "0 act, advertising sales, algorithms, business, ... \n", "1 act, brand communication, business, business d... \n", "2 application, balance sheet, finance, property ... \n", "\n", " final_soft_skill \n", "0 collaboration, decision making, operations, wr... \n", "1 collaboration, customer service, management \n", "2 filing, management " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_user[\"final_hard_skill\"] = pd.DataFrame(list_hard_skill)\n", "test_user[\"final_soft_skill\"] = pd.DataFrame(list_soft_skill)\n", "test_user.head(3)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "kYbjYsDjABda" }, "outputs": [], "source": [ "list_hard_skill = [train_user[\"hard_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(train_user))]\n", "list_soft_skill = [train_user[\"soft_skill\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(train_user))]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 213 }, "id": "GC8bn3cjB8D5", "outputId": "436e843d-425e-4ce2-e551-e4f249bdd10b" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User IDcandidate_fieldlabelhard_skillsoft_skillfinal_hard_skillfinal_soft_skill
03030sales0['blogs', 'business', 'lead generation', 'mark...['customer service', 'driven personality', 'ma...blogs, business, lead generation, marketing st...customer service, driven personality, manageme...
19702administration & office support0['business', 'draft', 'go', 'manufacturing', '...['business acumen', 'communications', 'managem...business, draft, go, manufacturing, office man...business acumen, communications, management, o...
28606retail & consumer products0['gross profit', 'inventory', 'inventory manag...['customer service', 'management']gross profit, inventory, inventory management,...customer service, management
\n", "
" ], "text/plain": [ " User ID candidate_field label \\\n", "0 3030 sales 0 \n", "1 9702 administration & office support 0 \n", "2 8606 retail & consumer products 0 \n", "\n", " hard_skill \\\n", "0 ['blogs', 'business', 'lead generation', 'mark... \n", "1 ['business', 'draft', 'go', 'manufacturing', '... \n", "2 ['gross profit', 'inventory', 'inventory manag... \n", "\n", " soft_skill \\\n", "0 ['customer service', 'driven personality', 'ma... \n", "1 ['business acumen', 'communications', 'managem... \n", "2 ['customer service', 'management'] \n", "\n", " final_hard_skill \\\n", "0 blogs, business, lead generation, marketing st... \n", "1 business, draft, go, manufacturing, office man... \n", "2 gross profit, inventory, inventory management,... \n", "\n", " final_soft_skill \n", "0 customer service, driven personality, manageme... \n", "1 business acumen, communications, management, o... \n", "2 customer service, management " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_user[\"final_hard_skill\"] = pd.DataFrame(list_hard_skill)\n", "train_user[\"final_soft_skill\"] = pd.DataFrame(list_soft_skill)\n", "train_user.head(3)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "znBy9q8XDcM7" }, "outputs": [], "source": [ "list_hard_skill = [jobs[\"Hard Skills\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(jobs))]\n", "list_soft_skill = [jobs[\"Soft Skills\"].iloc[i].replace(\"[\", \"\").replace(\"]\", \"\").replace(\"'\", \"\") for i in range(len(jobs))]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 213 }, "id": "knFii8o3EQmv", "outputId": "47afb484-0765-4ad9-8765-d084673450ac" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Job IDMajorHard SkillsSoft Skillsfinal_hard_skillfinal_soft_skill
01accounting['business', 'finance', 'excel', 'tax', 'servi...['management', 'planning', 'operations', 'lead...business, finance, excel, tax, service, data, ...management, planning, operations, leadership, ...
12administration & office support['service', 'business', 'data', 'excel', 'appl...['management', 'customer service', 'microsoft ...service, business, data, excel, application, s...management, customer service, microsoft office...
23advertising, arts & media['business', 'digital', 'sales', 'service', 'a...['management', 'social media', 'writing', 'com...business, digital, sales, service, application...management, social media, writing, communicati...
\n", "
" ], "text/plain": [ " Job ID Major \\\n", "0 1 accounting \n", "1 2 administration & office support \n", "2 3 advertising, arts & media \n", "\n", " Hard Skills \\\n", "0 ['business', 'finance', 'excel', 'tax', 'servi... \n", "1 ['service', 'business', 'data', 'excel', 'appl... \n", "2 ['business', 'digital', 'sales', 'service', 'a... \n", "\n", " Soft Skills \\\n", "0 ['management', 'planning', 'operations', 'lead... \n", "1 ['management', 'customer service', 'microsoft ... \n", "2 ['management', 'social media', 'writing', 'com... \n", "\n", " final_hard_skill \\\n", "0 business, finance, excel, tax, service, data, ... \n", "1 service, business, data, excel, application, s... \n", "2 business, digital, sales, service, application... \n", "\n", " final_soft_skill \n", "0 management, planning, operations, leadership, ... \n", "1 management, customer service, microsoft office... \n", "2 management, social media, writing, communicati... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jobs[\"final_hard_skill\"] = pd.DataFrame(list_hard_skill)\n", "jobs[\"final_soft_skill\"] = pd.DataFrame(list_soft_skill)\n", "jobs.head(3)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "wiDiHL6lStnd" }, "outputs": [], "source": [ "# Feature Engineering\n", "def feature_engineering(applicants, companies):\n", " # Vectorize skills and majors\n", " tfidf_vectorizer_skills = TfidfVectorizer()\n", " tfidf_vectorizer_majors = TfidfVectorizer()\n", "\n", " all_skills = pd.concat([applicants['final_hard_skill'], applicants['final_soft_skill'],\n", " companies['final_hard_skill'], companies['final_soft_skill']])\n", " all_majors = pd.concat([applicants['candidate_field'], companies['Major']])\n", "\n", " all_skills_vectorized = tfidf_vectorizer_skills.fit_transform(all_skills)\n", " all_majors_vectorized = tfidf_vectorizer_majors.fit_transform(all_majors)\n", "\n", " num_applicants = len(applicants)\n", " num_companies = len(companies)\n", "\n", " # Split the TF-IDF vectors back into applicants and companies\n", " applicants_skills_vectorized = all_skills_vectorized[:num_applicants*2] # because each applicant has 2 skill entries\n", " companies_skills_vectorized = all_skills_vectorized[num_applicants*2:]\n", "\n", " applicants_majors_vectorized = all_majors_vectorized[:num_applicants]\n", " companies_majors_vectorized = all_majors_vectorized[num_applicants:]\n", "\n", " return (applicants_skills_vectorized, applicants_majors_vectorized,\n", " companies_skills_vectorized, companies_majors_vectorized, tfidf_vectorizer_skills, tfidf_vectorizer_majors)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "id": "THM0mszQGNyD" }, "outputs": [], "source": [ "def compute_similarity(applicants_skills_vectorized, applicants_majors_vectorized,\n", " companies_skills_vectorized, companies_majors_vectorized):\n", " # Calculate similarity based on skills (averaging hard and soft skills similarities)\n", " applicants_skills = (applicants_skills_vectorized[0::2] + applicants_skills_vectorized[1::2]) / 2\n", " companies_skills = (companies_skills_vectorized[0::2] + companies_skills_vectorized[1::2]) / 2\n", "\n", " skills_similarity = cosine_similarity(applicants_skills, companies_skills)\n", "\n", " # Calculate similarity based on majors\n", " majors_similarity = cosine_similarity(applicants_majors_vectorized, companies_majors_vectorized)\n", "\n", " # Ensure the number of companies in both similarities is aligned\n", " if skills_similarity.shape[1] != majors_similarity.shape[1]:\n", " min_dim = min(skills_similarity.shape[1], majors_similarity.shape[1])\n", " skills_similarity = skills_similarity[:, :min_dim]\n", " majors_similarity = majors_similarity[:, :min_dim]\n", "\n", " # Combine these similarities (simple average for this example)\n", " combined_similarity = (skills_similarity + majors_similarity) / 2\n", " return combined_similarity" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "ter3YAzxoelD" }, "outputs": [], "source": [ "# Recommendation Function\n", "def recommend_jobs(applicants, companies, similarity_scores):\n", " recommendations = {}\n", " for i, applicant in enumerate(applicants['User ID']):\n", " if i < len(similarity_scores):\n", " sorted_company_indices = np.argsort(-similarity_scores[i]) # Descending sort of scores\n", " recommended_companies = companies.iloc[sorted_company_indices]['Major'].values[:3] # Top 3 recommendations\n", " recommendations[applicant] = recommended_companies\n", " return recommendations\n", "\n", "# Testing and Evaluation Function\n", "def print_recommendations(applicants, companies, recommendations):\n", " # This is a mock function since we don't have ground truth to compare to.\n", " # In a real scenario, we would compare against actual matches or use some form of feedback.\n", " print(\"Recommendations for each applicant:\")\n", " for applicant in recommendations:\n", " print(f\"{applicant}: {recommendations[applicant]}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "Ajxp0xelIrl2", "outputId": "08bafc5b-73cc-4695-924a-931840047dd5" }, "outputs": [], "source": [ "# Let's create and process the data, and compute recommendations\n", "# train_applicants, test_applicants, companies = create_mock_data()\n", "applicants_skills_vec, applicants_majors_vec, companies_skills_vec, companies_majors_vec, tfidf_vectorizer_skills, tfidf_vectorizer_majors = feature_engineering(train_user, jobs)\n", "\n", "similarity_scores = compute_similarity(applicants_skills_vec, applicants_majors_vec, companies_skills_vec, companies_majors_vec)\n", "recommendations = recommend_jobs(test_user, jobs, similarity_scores)\n", "\n", "# Output the recommendations to observe the results\n", "print_recommendations(test_user, jobs, recommendations)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nj-HEdyJlYNY", "outputId": "063b84bc-5717-4a0c-8367-939a054657bc" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Recommended Jobs based on input skills and major:\n", "['sales' 'it jobs' 'administration & office support']\n" ] } ], "source": [ "# Process input skills and recommend jobs\n", "def recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec):\n", " input_hard_skills_vec = tfidf_vectorizer_skills.transform([input_hard_skills])\n", " input_soft_skills_vec = tfidf_vectorizer_skills.transform([input_soft_skills])\n", " input_major_vec = tfidf_vectorizer_majors.transform([input_major])\n", "\n", " # Average the vectorized hard and soft skills\n", " input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2\n", "\n", " # Compute similarities\n", " skills_similarity = cosine_similarity(input_skills_vec, companies_skills_vec)\n", " major_similarity = cosine_similarity(input_major_vec, companies_majors_vec)\n", "\n", " # Ensure the number of companies in both similarities is aligned\n", " if skills_similarity.shape[1] != major_similarity.shape[1]:\n", " min_dim = min(skills_similarity.shape[1], major_similarity.shape[1])\n", " skills_similarity = skills_similarity[:, :min_dim]\n", " major_similarity = major_similarity[:, :min_dim]\n", "\n", " # Combine similarities\n", " combined_similarity = (skills_similarity + major_similarity) / 2\n", "\n", " # Get top 3 job recommendations\n", " sorted_company_indices = np.argsort(-combined_similarity[0])\n", " recommended_companies = jobs.iloc[sorted_company_indices]['Major'].values[:3]\n", "\n", " return recommended_companies\n", "\n", "\"\"\"TEST RECOMMENDED SYSTEM\"\"\"\n", "\n", "input_hard_skills = \"Java, Excel, Python\"\n", "input_soft_skills = \"Communication, Teamwork\"\n", "input_major = \"Sales\"\n", "\n", "recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, jobs, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec)\n", "print(\"Recommended Jobs based on input skills and major:\")\n", "print(recommended_jobs)" ] }, { "cell_type": "markdown", "metadata": { "id": "IMTilMnQINZC" }, "source": [ "TEST RECOMMENDED SYSTEM" ] }, { "cell_type": "markdown", "metadata": { "id": "kShd99z_NiTa" }, "source": [ "Evaluating (PENDING)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WfEgjqw9JE3l" }, "outputs": [], "source": [ "def create_ground_truth(csv_file_path):\n", " data = pd.read_csv(csv_file_path)\n", "\n", " # Tạo dictionary `ground_truth`\n", " ground_truth = {}\n", " for index, row in data.iterrows():\n", " user_id = row['User ID']\n", " actual_major = row['candidate_field']\n", "\n", " # Thêm vào dictionary, giả sử mỗi ứng viên chỉ chọn một công việc\n", " ground_truth[user_id] = [actual_major]\n", "\n", " return ground_truth\n", "\n", "# Sử dụng hàm trên để tạo `ground_truth`\n", "csv_file_path = '/content/sample_data/1st_test.csv'\n", "ground_truth = create_ground_truth(csv_file_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "collapsed": true, "id": "TRiD4oS-AKFE", "outputId": "256fadeb-b250-4602-affb-005cb9c658eb" }, "outputs": [], "source": [ "display(ground_truth)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pXsa_wbANjmb", "outputId": "9bd4fc1e-781b-439c-fe35-c28769f6714c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average Precision@3 with 18979 trains and 4745 tests: 0.1252546540217773\n" ] } ], "source": [ "def precision_at_k(recommendations, ground_truth, k=3):\n", " \"\"\"\n", " Calculate the precision at k for recommendation system.\n", "\n", " Parameters:\n", " - recommendations (dict): Dictionary where keys are user IDs and values are lists of recommended majors.\n", " - ground_truth (dict): Dictionary where keys are user IDs and values are lists of truly suitable majors.\n", " - k (int): The number of top recommendations to consider for calculating precision.\n", "\n", " Returns:\n", " - float: The average precision at k for all users.\n", " \"\"\"\n", " precision_scores = []\n", "\n", " for applicant, recommended_major in recommendations.items():\n", " if applicant in ground_truth:\n", " # Get top k recommendations\n", " top_k_recs = recommended_major[:k]\n", " # Calculate the number of relevant recommendations\n", " relevant_recs = sum(1 for major in top_k_recs if major in ground_truth[applicant])\n", " # Precision at k for this user\n", " precision = relevant_recs / k\n", " precision_scores.append(precision)\n", "\n", " # Average precision at k over all users\n", " average_precision = np.mean(precision_scores) if precision_scores else 0\n", " return average_precision\n", "\n", "avg_precision = precision_at_k(recommendations, ground_truth)\n", "print(\"Average Precision@3 with 18979 trains and 4745 tests:\", avg_precision)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KAIvtKEaRQml", "outputId": "7dd82dc6-0e1b-43d5-bc95-cb457cde5d72" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average Recall@3 with 18979 trains and 4745 tests: 0.3757639620653319\n" ] } ], "source": [ "def recall_at_k(recommendations, ground_truth, k=3):\n", " recall_scores = []\n", "\n", " for user_id, recommended_majors in recommendations.items():\n", " if user_id in ground_truth:\n", " # Get top k recommendations\n", " top_k_recs = recommended_majors[:k]\n", " # Calculate the number of relevant recommendations\n", " relevant_recs = sum(1 for major in top_k_recs if major in ground_truth[user_id])\n", " # Calculate the total number of relevant items\n", " total_relevant = len(ground_truth[user_id])\n", " # Recall at k for this user\n", " recall = relevant_recs / total_relevant if total_relevant else 0\n", " recall_scores.append(recall)\n", "\n", " # Average recall at k over all users\n", " average_recall = sum(recall_scores) / len(recall_scores) if recall_scores else 0\n", " return average_recall\n", "\n", "# Example usage:\n", "avg_recall = recall_at_k(recommendations, ground_truth)\n", "print(\"Average Recall@3 with 18979 trains and 4745 tests:\", avg_recall)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QUHBsQS_-5Eu", "outputId": "fdab3075-dab8-458e-e663-2564b20da97c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Average F1 Score@3: 0.18788198103266596\n" ] } ], "source": [ "def f1_score_at_k(recommendations, ground_truth, k=3):\n", " precision = precision_at_k(recommendations, ground_truth, k)\n", " recall = recall_at_k(recommendations, ground_truth, k)\n", "\n", " if precision + recall == 0:\n", " return 0\n", "\n", " f1_score = 2 * (precision * recall) / (precision + recall)\n", " return f1_score\n", "\n", "avg_f1_score = f1_score_at_k(recommendations, ground_truth)\n", "\n", "print(\"Average F1 Score@3:\", avg_f1_score)" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 0 }