nrms

File size: 6,163 Bytes

f97e310

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "0562d8a6-e8e3-4659-ab21-e99d76adcf3c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n",
      "35982,9796527,0.12911629676818848\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in range(10):\n",
    "    with open(\"test_set.txt\") as f:\n",
    "        print(f.readline())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9e72123e-5a81-4fd1-a07b-f847aee5a590",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "test_behavior_path = \"/work/Blue/ebnerd/ebnerd_testset/test/behaviors.parquet\"\n",
    "\n",
    "import polars as pl\n",
    "\n",
    "test_behavior_df = pl.read_parquet(test_behavior_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "7c337f1c-8a0e-4a61-9916-0c86887f320e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 13536710/13536710 [18:13<00:00, 12380.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Zipping predictions.txt to predictions.zip\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "import numpy as np\n",
    "from pathlib import Path\n",
    "import zipfile\n",
    "\n",
    "\n",
    "def transform_list(input_list):\n",
    "    # 입력 리스트를 Numpy 배열로 변환합니다.\n",
    "    arr = np.array(input_list)\n",
    "\n",
    "    # 내림차순으로 정렬된 인덱스를 가져옵니다.\n",
    "    sorted_indices = np.argsort(-arr)\n",
    "\n",
    "    # 순위를 매깁니다 (1부터 시작).\n",
    "    ranks = np.empty_like(sorted_indices)\n",
    "    ranks[sorted_indices] = np.arange(1, len(arr) + 1)\n",
    "\n",
    "    return ranks.tolist()\n",
    "\n",
    "def zip_submission_file(\n",
    "    path: Path,\n",
    "    filename_zip: str = None,\n",
    "    verbose: bool = True,\n",
    "    rm_file: bool = True,\n",
    ") -> None:\n",
    "    \"\"\"\n",
    "    Compresses a specified file into a ZIP archive within the same directory.\n",
    "\n",
    "    Args:\n",
    "        path (Path): The directory path where the file to be zipped and the resulting zip file will be located.\n",
    "        filename_input (str, optional): The name of the file to be compressed. Defaults to the path.name.\n",
    "        filename_zip (str, optional): The name of the output ZIP file. Defaults to \"prediction.zip\".\n",
    "        verbose (bool, optional): If set to True, the function will print the process details. Defaults to True.\n",
    "        rm_file (bool, optional): If set to True, the original file will be removed after compression. Defaults to True.\n",
    "\n",
    "    Returns:\n",
    "        None: This function does not return any value.\n",
    "    \"\"\"\n",
    "    path = Path(path)\n",
    "    if filename_zip:\n",
    "        path_zip = path.parent.joinpath(filename_zip)\n",
    "    else:\n",
    "        path_zip = path.with_suffix(\".zip\")\n",
    "\n",
    "    if path_zip.suffix != \".zip\":\n",
    "        raise ValueError(f\"suffix for {path_zip.name} has to be '.zip'\")\n",
    "    if verbose:\n",
    "        print(f\"Zipping {path} to {path_zip}\")\n",
    "    f = zipfile.ZipFile(path_zip, \"w\", zipfile.ZIP_DEFLATED)\n",
    "    f.write(path, arcname=path.name)\n",
    "    f.close()\n",
    "    if rm_file:\n",
    "        path.unlink()\n",
    "\n",
    "with open(\"predictions.txt\", 'w') as wf:\n",
    "    with open(\"test_set.txt\", 'r') as f:\n",
    "        behaviors_iter = test_behavior_df.select(\"impression_id\", \"user_id\", \"article_ids_inview\").iter_rows()\n",
    "        index = 0\n",
    "        for data in tqdm(behaviors_iter, total=len(test_behavior_df)):\n",
    "            impression_id = data[0]\n",
    "            user_id = data[1]\n",
    "            article_ids_inview = data[2]\n",
    "\n",
    "            scores = []\n",
    "\n",
    "            for article_id in article_ids_inview:\n",
    "                preds = f.readline().split(\",\")\n",
    "\n",
    "                p_user_id = preds[0]\n",
    "                p_article_id = preds[1]\n",
    "                p_score = preds[2]\n",
    "\n",
    "                if str(article_id) == str(p_article_id):\n",
    "                    scores.append(float(p_score))\n",
    "                else:\n",
    "                    print(\"Different 0.0\")\n",
    "                    scores.append(float(0.0))\n",
    "\n",
    "            index_ranked = transform_list(scores)\n",
    "            preds = \"[\" + \",\".join([str(ir) for ir in index_ranked]) + \"]\"\n",
    "\n",
    "            wf.write(\" \".join([str(impression_id), preds]) + \"\\n\")\n",
    "\n",
    "zip_submission_file(path=Path(\"predictions.txt\"), rm_file=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d5c1bcc-e4b0-4217-93ec-4ca3e24dc6ab",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "blue",
   "language": "python",
   "name": "blue"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}