{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "20356e27-98f6-4a19-b0ec-d1d2e92029f1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] } ], "source": [ "from handler import EndpointHandler\n", " \n", "# init handler\n", "my_handler = EndpointHandler(path=\"Szczotar93/Layoutlm_Inkaso_2\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "63a53fa9-c2ae-425c-9a8a-ec2415753630", "metadata": {}, "outputs": [], "source": [ "from transformers import LayoutLMForTokenClassification, LayoutLMv2Processor, LayoutLMTokenizer, AutoModelForTokenClassification, AutoProcessor\n", "\n", "from PIL import Image, ImageDraw, ImageFont\n", "import torch\n", "import pandas as pd\n", "import pytesseract\n", "\n", "pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'" ] }, { "cell_type": "code", "execution_count": 2, "id": "684076e3-9ec2-4c99-af1e-6860a3e355e9", "metadata": {}, "outputs": [], "source": [ "from PIL import Image\n", "filename = r\"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\documentsImages\\test\\2.png\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "660f4096-9122-41fc-b38a-fd5299a16df5", "metadata": {}, "outputs": [], "source": [ "img = Image.open(filename)" ] }, { "cell_type": "code", "execution_count": 4, "id": "04ee0572-093c-4e30-872c-24216c807e4c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'C:\\\\Users\\\\ArturSzczotarski\\\\LLM env\\\\du_env\\\\documentsImages\\\\test\\\\2.png'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "img.filename" ] }, { "cell_type": "code", "execution_count": 6, "id": "096b6ffb-767e-45a2-bf4b-1f6d3f67f3a4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'predictions': [[{'word': 'km 1792 /', 'label': 'doc id', 'score': '0.94'},\n", " {'word': 'wezwanie do dokonywania potraceh ztur',\n", " 'label': 'title',\n", " 'score': '0.98'},\n", " {'word': 'kredyt inkaso s. a', 'label': 'creditor name', 'score': '0.95'},\n", " {'word': '02 - 672 warszawa domaniewska 39',\n", " 'label': 'creditor address',\n", " 'score': '0.97'},\n", " {'word': '##ter mateusz garbula kanaria. -',\n", " 'label': 'creditor proxy',\n", " 'score': '0.92'}]]}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "my_handler({\"inputs\": img})" ] }, { "cell_type": "code", "execution_count": 9, "id": "52580570-51a6-4d73-aab0-ba3bf2af41f2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'C:\\\\Users\\\\ArturSzczotarski\\\\LLM env\\\\du_env\\\\documentsImages\\\\test\\\\2.png'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "img.filename" ] }, { "cell_type": "code", "execution_count": 8, "id": "04c995ad-634b-4057-92ad-25d329371911", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "object of type 'PngImageFile' has no len()", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[8], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataLoader\n\u001b[1;32m----> 3\u001b[0m dataloader \u001b[38;5;241m=\u001b[39m \u001b[43mDataLoader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:350\u001b[0m, in \u001b[0;36mDataLoader.__init__\u001b[1;34m(self, dataset, batch_size, shuffle, sampler, batch_sampler, num_workers, collate_fn, pin_memory, drop_last, timeout, worker_init_fn, multiprocessing_context, generator, prefetch_factor, persistent_workers, pin_memory_device)\u001b[0m\n\u001b[0;32m 348\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# map-style\u001b[39;00m\n\u001b[0;32m 349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shuffle:\n\u001b[1;32m--> 350\u001b[0m sampler \u001b[38;5;241m=\u001b[39m \u001b[43mRandomSampler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgenerator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 351\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 352\u001b[0m sampler \u001b[38;5;241m=\u001b[39m SequentialSampler(dataset) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n", "File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\sampler.py:142\u001b[0m, in \u001b[0;36mRandomSampler.__init__\u001b[1;34m(self, data_source, replacement, num_samples, generator)\u001b[0m\n\u001b[0;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplacement, \u001b[38;5;28mbool\u001b[39m):\n\u001b[0;32m 140\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreplacement should be a boolean value, but got replacement=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplacement\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 142\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_samples\u001b[49m, \u001b[38;5;28mint\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_samples \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 143\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_samples should be a positive integer value, but got num_samples=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_samples\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\sampler.py:149\u001b[0m, in \u001b[0;36mRandomSampler.num_samples\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 145\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[0;32m 146\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnum_samples\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[0;32m 147\u001b[0m \u001b[38;5;66;03m# dataset size might change at runtime\u001b[39;00m\n\u001b[0;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_samples \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 149\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_source\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_samples\n", "\u001b[1;31mTypeError\u001b[0m: object of type 'PngImageFile' has no len()" ] } ], "source": [ "from torch.utils.data import DataLoader\n", "\n", "dataloader = DataLoader(img, batch_size=1, shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "e604a2bd-a068-46bb-82d8-4fba7fc6212b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "du_env", "language": "python", "name": "du_env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }