{"cells":[{"cell_type":"markdown","metadata":{"id":"TW2HyR7ricFu"},"source":["# Object Detection with Transformer\n","\n","**Object detection** เป็นการตรวจจับวัตถุต่างๆ (คน, สิ่งก่อสร่้าง, รถ) โดยโมเดลจะรับค่ามาเป็นรูปภาพและจะทำนายว่าวัตถุที่เราสนใจนั้นอยู่ตรงส่วนไหนของภาพและวัตถุนั้นจัดอยู่ใน class ใด object detection ถูกใช้ในงานหลากหลายรูปแบบเช่น autonomous driving ที่ใช้โมเดลในการตรวจจับบุคคล, เส้นถนน หรือ สัญญาณไฟ"]},{"cell_type":"markdown","metadata":{"id":"y6XFd4uWicFw"},"source":["Tutorial from : https://huggingface.co/docs/transformers/tasks/object_detection"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":23819,"status":"ok","timestamp":1688466276921,"user":{"displayName":"korrawiz Chotayapa","userId":"13359737025054536148"},"user_tz":-420},"id":"ExkT80pxicFx","outputId":"8c4a1fcb-a680-4658-9537-e850c6722137"},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m486.2/486.2 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m70.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m40.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m110.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m93.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.30.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.12.2)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.15.1)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.22.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (23.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2022.10.31)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.27.1)\n","Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.13.3)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.3.1)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.65.0)\n","Requirement already satisfied: torch!=1.12.0,>=1.9 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.0.1+cu118)\n","Collecting accelerate>=0.20.2 (from transformers[torch])\n"," Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.6/227.6 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.20.2->transformers[torch]) (5.9.5)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers[torch]) (2023.6.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers[torch]) (4.6.3)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (1.11.1)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (3.1.2)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch!=1.12.0,>=1.9->transformers[torch]) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.9->transformers[torch]) (3.25.2)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch!=1.12.0,>=1.9->transformers[torch]) (16.0.6)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (1.26.16)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2023.5.7)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2.0.12)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.4)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch!=1.12.0,>=1.9->transformers[torch]) (2.1.3)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch!=1.12.0,>=1.9->transformers[torch]) (1.3.0)\n","Installing collected packages: accelerate\n","Successfully installed accelerate-0.20.3\n"]}],"source":["# install related libraries\n","!pip install -q datasets transformers evaluate timm albumentations\n","!pip install transformers[torch]"]},{"cell_type":"markdown","metadata":{"id":"B0euspRpicFy"},"source":["## Import libraries"]},{"cell_type":"markdown","metadata":{"id":"rMPPQMLZicFy"},"source":["ที่ใช้ในการทำงาน เราจะใช้ `transformers` และ `datasets` มาช่วยในการทำงาน จาก huggingface hub"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5CFJCrvDicFy"},"outputs":[],"source":["from transformers import AutoModelForObjectDetection # library สำหรับโหลด pre-trained model ที่เราต้องการ\n","from transformers import AutoImageProcessor # library สำหรับโมเดลที่เป็น image processor\n","from transformers import TrainingArguments # library สำหรับเก็บ arguments ที่เกี่ยวข้องกับการ train model\n","from transformers import Trainer # library สำหรับ train model\n","from datasets import load_dataset # library สำหรับโหลด dataset ที่เราต้องการ\n","import torch\n","import torchvision # library สำหรับการทำงานกับรูปภาพ\n","import evaluate # library สำหรับการคำนวณค่าความแม่นยำของโมเดล\n","from tqdm import tqdm # library สำหรับการแสดง progress bar\n","import albumentations # library สำหรับการทำ data augmentation\n","from PIL import Image, ImageDraw # library สำหรับการทำงานกับรูปภาพ\n","import requests # library สำหรับการดาวน์โหลดไฟล์จาก Google Drive (และอื่นๆ)\n","import numpy as np # library สำหรับการทำงานกับ array\n","import json # library สำหรับการทำงานกับไฟล์ json\n","import os # library สำหรับการทำงานกับไฟล์และโฟลเดอร์"]},{"cell_type":"markdown","metadata":{"id":"Lia5CWBNicFz"},"source":["## Preparing Data"]},{"cell_type":"markdown","metadata":{"id":"rD7ZP85micFz"},"source":["Download `cppe-5` dataset เพื่อใช้ในการ train model และ test model"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":405,"referenced_widgets":["5904f1a8809b4701967b1730f39f1c34","1e2f6540d542462cba714c7bedef048d","6a40109703dd4bf4993bc06082e606fd","cae5cc84f6e9407ea29755a678c3c836","d9f6b65a7f9b4720966f97c79133f727","ee8aafc678cf444386ea181486e3d75d","1cb1017592b840b598b825e2f4b88cec","fe9d669282a14c4e9554de06cec2d7a7","a843912ac37f4e2a9bd5fa9de7a500aa","3eba5a083ac2474ea41bfd8ad9d098f9","64a277be549e4cd786c367a201cf22cd","b7bf7c7897da4196ab900dc4ef0c7204","cd4200b6c42048a68f8c864bd3760b82","6df3e6da06e04f4c99b58ac41cdf70f0","c68b041283644d61877735f288042bc1","58c3345322db446db3f33884003f169b","173606e640de46058483cd2877466816","c211b6b2b32b4b29b889b8581d58a91a","317798501e5f4a38b9194c55aaeb9ada","ef723431f6b54c0f840f5775797d7603","c0d485e17e1d49ee8de865758e565b11","47b5bb962c8b4ea6a5722fe6b33ab5a2","4b9942d2cb5244ab9916a0cc3d7c8bad","c26155eaa7014cccb3021af68ecf3a9e","17f00d0206c8490197c3c9279d01b997","1e136a83653b412982731b99f390b715","c40141a43abc4db99bd8bfb0a6e12daa","4af1710cf75540e896cd0ddbe4434824","a983155b40f14a468701e79b8252782e","3d0ca9800d7e4e0a8f21731b6dbd98d6","ba9acb50e4b14db8b783e8278a302e77","1ae9d044c6014238a47e3208b8be842e","7116b8dd222a47e2a88cd1ecd6d7a625","9815cf6d4bdc4511a493c27493699110","e06a8c29b4b14adc8b01af7d80038b10","956d4dd02df94d88a3976fd9a73fe547","9176abe739c248019d5c43915bdaf92a","1832fdd1f76b4543b0092889d182b4ba","4e23f5edd4ee475ea58c9b20a30a9cea","8911a31f1c9746a78d18e85a399cc0ec","841b28959799467db6f7f6bc3d689ec4","6143166db7cb44e998e346cff5343fc5","08b7c032bb1e401bb621ab0302241404","79bd5273c95e4147bc33bbfe45b3ed63","8ff8b9d664184555a724cdfdbd02ec11","04f4a155eff14e9fa5587c973069a5e0","280553b2763948eb945a2ca00699352b","98dd7e4ba2834b3d836d05760b2cc7c2","c98f8766f0f84e14a4188d837a9d44d4","299bc8b1e01842f585ff547ce56dd47b","4746390bdf4e44e3afccbe6d6f978b33","b3d53f6fa66c42339cbd04ae2d970709","90f75f42ba77459fb752471c88f3614f","add9711a24794b42a76da2b760bdee3b","cf611ad4c634416f9a2571a64ea8cb2b","85ff16919f8142b699b3bf5f870b0a54","1803f0c8c52b424aaee0061876a0b61d","db0f57f9bf374c0cbb33a7aa23d0e16a","d0cb9fe1b2a44841ab5f5f13b0b02593","b47f51b3241d4265810d78f52b99c3f3","5f432084951a47e58d188eeca647f539","7fcca09badb24b96b2872896b8fdf38f","7e1bf739097b4195b94f6aef57ab6565","050f828d652d48d4b183083b6363c670","fb674b0387f843f9a77fc9ddf9b4778f","767fba6272ec4d37b05c23ba5808ee68","0773fe8bf06d4af9bfa0106609473a23","80db8269c89c47baa9c708424630f9d2","206d82ac9d424ac58e9538d1d7025f59","7f8bb6b5838f4ff6b4324aeb1943c7b0","16f4977e534c4fa1baf947b5e201c7f1","4bbb9375630f440c9b5a71188e3bbdc3","d4c451fa598d4d7393ae7beaf4920211","4ac355b7e85c44328632fef809931e37","16e217e123d94d5c9bf365bf083cc2cb","28a8d33670674a89830e5e920aff2c5d","c7f5384f31ce4c7fa490ef9d5d62e89d"]},"executionInfo":{"elapsed":19909,"status":"ok","timestamp":1688466313874,"user":{"displayName":"korrawiz Chotayapa","userId":"13359737025054536148"},"user_tz":-420},"id":"lv7UeX7RicF0","outputId":"09283a1a-8bc0-4e56-9e30-7e67cd59f0bf"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5904f1a8809b4701967b1730f39f1c34","version_major":2,"version_minor":0},"text/plain":["Downloading builder script: 0%| | 0.00/4.89k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"b7bf7c7897da4196ab900dc4ef0c7204","version_major":2,"version_minor":0},"text/plain":["Downloading metadata: 0%| | 0.00/2.08k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"4b9942d2cb5244ab9916a0cc3d7c8bad","version_major":2,"version_minor":0},"text/plain":["Downloading readme: 0%| | 0.00/11.0k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["Downloading and preparing dataset cppe-5/default to /root/.cache/huggingface/datasets/cppe-5/default/1.0.0/dd60c7c8210a67663b06108fb9e23c70acb98e2d3a4f3636f429509b19b74989...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9815cf6d4bdc4511a493c27493699110","version_major":2,"version_minor":0},"text/plain":["Downloading data: 0%| | 0.00/238M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8ff8b9d664184555a724cdfdbd02ec11","version_major":2,"version_minor":0},"text/plain":["Generating train split: 0%| | 0/1000 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"85ff16919f8142b699b3bf5f870b0a54","version_major":2,"version_minor":0},"text/plain":["Generating test split: 0%| | 0/29 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["Dataset cppe-5 downloaded and prepared to /root/.cache/huggingface/datasets/cppe-5/default/1.0.0/dd60c7c8210a67663b06108fb9e23c70acb98e2d3a4f3636f429509b19b74989. Subsequent calls will reuse this data.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0773fe8bf06d4af9bfa0106609473a23","version_major":2,"version_minor":0},"text/plain":[" 0%| | 0/2 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["DatasetDict({\n"," train: Dataset({\n"," features: ['image_id', 'image', 'width', 'height', 'objects'],\n"," num_rows: 1000\n"," })\n"," test: Dataset({\n"," features: ['image_id', 'image', 'width', 'height', 'objects'],\n"," num_rows: 29\n"," })\n","})"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["cppe5 = load_dataset(\"cppe-5\") # โหลด dataset ที่เราต้องการ\n","cppe5"]},{"cell_type":"markdown","metadata":{"id":"3hGH8A67icF0"},"source":["สร้าง `id2label` และ `label2id` ของ dataset ที่ใช้"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"B2z_s1L3icF0"},"outputs":[],"source":["categories = cppe5[\"train\"].features[\"objects\"].feature[\"category\"].names # ดู category ทั้งหมดที่มีใน dataset\n","id2label = {index: x for index, x in enumerate(categories, start=0)} # สร้าง dictionary ที่ map จาก id ไปยัง label\n","label2id = {v: k for k, v in id2label.items()} # สร้าง dictionary ที่ map จาก label ไปยัง id"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"LfOlS_qQicF1"},"outputs":[],"source":["# remove images with no objects\n","remove_idx = [590, 821, 822, 875, 876, 878, 879] # ลบรูปภาพที่ไม่มี object ออกจาก dataset\n","keep = [i for i in range(len(cppe5[\"train\"])) if i not in remove_idx] # สร้าง list ของ index ที่เราต้องการเก็บไว้\n","cppe5[\"train\"] = cppe5[\"train\"].select(keep) # ลบรูปภาพที่ไม่มี object ออกจาก dataset "]},{"cell_type":"markdown","metadata":{"id":"LlYXhDgBicF1"},"source":["## Transform Data"]},{"cell_type":"markdown","metadata":{"id":"z75T_jShicF1"},"source":["`trasnfrom` data ก่อนที่จะเข้าไปใช้ใน model โดยใช้ `albumentations` ในการทำ data augmentation"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jNxtwz3micF1"},"outputs":[],"source":["# สร้างขั้นตอนการแปลงรูปภาพเป็น feature vector โดยมีขั้นตอนดังนี้\n","# 1. ทำการ resize รูปภาพให้มีขนาด 480x480\n","# 2. ทำการกลับภาพแนวนอน (horizontal flip)\n","# 3. ทำการแปลง random brightness () และ contrast \n","# โดยที่ p คือความน่าจะเป็นที่จะทำการแปลง\n","transform = albumentations.Compose(\n"," [\n"," albumentations.Resize(480, 480), # resize to 480x480\n"," albumentations.HorizontalFlip(p=1.0), # horizontal flip\n"," albumentations.RandomBrightnessContrast(p=1.0), # random brightness and contrast \n"," ],\n"," bbox_params=albumentations.BboxParams(format=\"coco\", label_fields=[\"category\"]), # กำหนด format ของ bounding box และ label\n",")"]},{"cell_type":"markdown","metadata":{"id":"yfYiADD9icF2"},"source":["define function สำหรับการ train model"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"y8xcLUmEicF2"},"outputs":[],"source":["# formatted annotations\n","# ในการทำ annotation จะมี format ได้หลายรูปแบบโดยในที่นี้จะเป็น coco format\n","def formatted_anns(image_id, category, area, bbox):\n"," annotations = []\n"," for i in range(0, len(category)):\n"," new_ann = {\n"," \"image_id\": image_id,\n"," \"category_id\": category[i],\n"," \"isCrowd\": 0,\n"," \"area\": area[i],\n"," \"bbox\": list(bbox[i]),\n"," }\n"," annotations.append(new_ann)\n","\n"," return annotations\n","\n","# transforming a batch\n","def transform_aug_ann(examples):\n"," image_ids = examples[\"image_id\"]\n"," images, bboxes, area, categories = [], [], [], []\n"," for image, objects in zip(examples[\"image\"], examples[\"objects\"]):\n"," image = np.array(image.convert(\"RGB\"))[:, :, ::-1]\n"," out = transform(image=image, bboxes=objects[\"bbox\"], category=objects[\"category\"])\n","\n"," area.append(objects[\"area\"])\n"," images.append(out[\"image\"])\n"," bboxes.append(out[\"bboxes\"])\n"," categories.append(out[\"category\"])\n","\n"," targets = [\n"," {\"image_id\": id_, \"annotations\": formatted_anns(id_, cat_, ar_, box_)}\n"," for id_, cat_, ar_, box_ in zip(image_ids, categories, area, bboxes)\n"," ]\n","\n"," return image_processor(images=images, annotations=targets, return_tensors=\"pt\")\n","\n","def collate_fn(batch):\n"," pixel_values = [item[\"pixel_values\"] for item in batch]\n"," encoding = image_processor.pad_and_create_pixel_mask(pixel_values, return_tensors=\"pt\") # ปรับขนาดของรูปภาพให้เหมาะสมกับโมเดล\n"," labels = [item[\"labels\"] for item in batch]\n"," batch = {}\n"," batch[\"pixel_values\"] = encoding[\"pixel_values\"]\n"," batch[\"pixel_mask\"] = encoding[\"pixel_mask\"]\n"," batch[\"labels\"] = labels\n"," return batch"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"SQ6ygSLsicF2"},"outputs":[],"source":["# train from cppe5 model with augmentation\n","cppe5[\"train\"] = cppe5[\"train\"].with_transform(transform_aug_ann)"]},{"cell_type":"markdown","metadata":{"id":"ux8tcBFficF2"},"source":["## Create and Train a Model"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":104,"referenced_widgets":["2e0b66bde3cb4ed5925f625a52b62ef1","f065fe98eaf74697948c24fcc7ad9dcc","478890d88da341a5a14465db156b9d57","757d5a37738845ec92a37c85a4600628","6876af5c2a3b4c9dbf015ef08b5a920f","f23cc02a5d084929a015a6b7462d7053","0f1bc218d7614ea48f19de735b1dbf4d","63b302966a034f6bb3a14c8a32f2cd90","67370ef81cc44a64b66bca8c4272a9e3","6d90f4b582044d32bc08d597a48093f0","d99063a7bc6949a7839d8bea4f9ee890"]},"executionInfo":{"elapsed":622,"status":"ok","timestamp":1688466314493,"user":{"displayName":"korrawiz Chotayapa","userId":"13359737025054536148"},"user_tz":-420},"id":"oDL6KJvCicF3","outputId":"6e2c954c-70c3-4192-a90b-51482d787c06"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"2e0b66bde3cb4ed5925f625a52b62ef1","version_major":2,"version_minor":0},"text/plain":["Downloading (…)rocessor_config.json: 0%| | 0.00/274 [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.\n","The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.\n"]}],"source":["# เลือก pretrain_model และ image processor ที่จะใช้\n","checkpoint = \"facebook/detr-resnet-50\"\n","image_processor = AutoImageProcessor.from_pretrained(checkpoint)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":202,"referenced_widgets":["13fc56c89e7e4dba84d2a84fefd8411a","2a56278d30fd4e559e30ebdc93c7b01e","6a54a8da1c3f4ca9abfdcefeaabea1db","e4b98befb40e4d718908bd598191728c","f0e5eccf250e40c2a5e1233be30e709a","e1b2ec534bb54fd69c5772b7f5512c81","ccb538e2e78c43efb16eefe2ef75021f","4952f0ad99664b4b8efe1ca7b16d2c8a","48a5b42c1bfc42a6ada3d83eb9c9bb71","567de45412f54b929a598f15c5512c4c","e234f2d25ec741dcbb8dbd3df3e93287","69ace8b703f540f696a75a81660cc72d","197b447d171144fba5d91f3f357b7a41","f2a16dbf70ec476f937d2f27c0dcabb4","e0dbc0e81ed9473eaf084b74f8faf242","c886e6698ced4e598bb25f6f028617e6","649c5a00c6b643f38f75119f5a447018","852e9b920b3a4d9daad8abb27b3f2139","dc93d18bc13c4d48b36f1decacae7999","b794867b0f4140548f2af6c31b142328","251e650199a84d05bf7ae54e2f091740","cbf3beb396c64295b6f68889c45fef10","8a6ab05e213543bf854a97e5fd67bdaf","29e46c6e5fca453b84ec68dcc74597f5","c316c553b50642018141992269a8ef76","b118d0ddbdb545859c253c3a2170d277","aa81509206d543bc81be114d197ef066","12fc448ed38c4b0f85c710a8361a9c16","fce4c2a6944a47419b0869e6c1e5b326","7e524af951e949c78958128c2a5a1ca3","c9d1383db00c41959398acb4f96dc1fc","7b623349286746248ca4f56c6ec388f7","d7f0a45ac5b4486a8383396c847da117"]},"executionInfo":{"elapsed":7563,"status":"ok","timestamp":1688466322047,"user":{"displayName":"korrawiz Chotayapa","userId":"13359737025054536148"},"user_tz":-420},"id":"J2yowGT9icF3","outputId":"e494a1bf-5a1a-4179-c71a-7481b0a812b2"},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"13fc56c89e7e4dba84d2a84fefd8411a","version_major":2,"version_minor":0},"text/plain":["Downloading (…)lve/main/config.json: 0%| | 0.00/4.59k [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"69ace8b703f540f696a75a81660cc72d","version_major":2,"version_minor":0},"text/plain":["Downloading pytorch_model.bin: 0%| | 0.00/167M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8a6ab05e213543bf854a97e5fd67bdaf","version_major":2,"version_minor":0},"text/plain":["Downloading model.safetensors: 0%| | 0.00/102M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:\n","- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([6, 256]) in the model instantiated\n","- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([6]) in the model instantiated\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]}],"source":["# สร้าง model จาก pretrain_model ที่เลือก\n","model = AutoModelForObjectDetection.from_pretrained(\n"," checkpoint,\n"," id2label=id2label,\n"," label2id=label2id,\n"," ignore_mismatched_sizes=True,\n",")"]},{"cell_type":"markdown","metadata":{"id":"Eai6qlsDicF3"},"source":["กำหนด `training_args` และ `trainer` สำหรับการเทรนโมเดล"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"snpyMxr-icF3"},"outputs":[],"source":["# [optional] disable wandb ที่ใช้ในการ log ข้อมูล\n","os.environ[\"WANDB_DISABLED\"] = \"true\""]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":431,"status":"ok","timestamp":1688468035850,"user":{"displayName":"korrawiz Chotayapa","userId":"13359737025054536148"},"user_tz":-420},"id":"EdYlWet5icF3","outputId":"a4566d0b-c355-48be-eacd-f82b640b81ad"},"outputs":[{"name":"stderr","output_type":"stream","text":["Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n"]}],"source":["training_args = TrainingArguments(\n"," output_dir=\"{directory_name}\", # ชื่อโฟลเดอร์ที่เราจะเก็บ model ที่ train ได้\n"," per_device_train_batch_size=2, # จำนวน batch size\n"," num_train_epochs=10, # จำนวน epoch ที่เราต้องการให้โมเดล train\n"," fp16=False, # ใช้ mixed precision หรือไม่\n"," save_steps=200, # จำนวน step ที่เราต้องการให้โมเดล save\n"," logging_steps=50,\n"," learning_rate=1e-5, # ค่า learning rate\n"," weight_decay=1e-4, # ค่า weight decay\n"," save_total_limit=2, # จำนวน model ที่เราต้องการให้โมเดล save\n"," remove_unused_columns=False,\n"," push_to_hub=False,\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ue4C0E6FicF3"},"outputs":[],"source":["# สร้าง trainer จาก model ที่เราสร้างไว้\n","trainer = Trainer(\n"," model=model,\n"," args=training_args,\n"," data_collator=collate_fn,\n"," train_dataset=cppe5[\"train\"],\n"," tokenizer=image_processor,\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"executionInfo":{"elapsed":3326650,"status":"ok","timestamp":1688471365263,"user":{"displayName":"korrawiz Chotayapa","userId":"13359737025054536148"},"user_tz":-420},"id":"kSTW_OVoicF4","outputId":"0b1a9674-d641-487b-85f5-05d838970690"},"outputs":[{"data":{"text/html":["\n","
Step | \n","Training Loss | \n","
---|---|
50 | \n","2.076000 | \n","
100 | \n","2.132500 | \n","
150 | \n","1.933300 | \n","
200 | \n","2.010700 | \n","
250 | \n","1.989500 | \n","
300 | \n","1.859800 | \n","
350 | \n","2.015400 | \n","
400 | \n","1.929600 | \n","
450 | \n","1.872200 | \n","
500 | \n","2.067200 | \n","
550 | \n","1.849000 | \n","
600 | \n","2.023300 | \n","
650 | \n","1.971100 | \n","
700 | \n","1.901000 | \n","
750 | \n","1.849900 | \n","
800 | \n","1.766900 | \n","
850 | \n","1.831400 | \n","
900 | \n","1.822200 | \n","
950 | \n","1.837700 | \n","
1000 | \n","1.681900 | \n","
1050 | \n","1.716500 | \n","
1100 | \n","1.715700 | \n","
1150 | \n","1.739500 | \n","
1200 | \n","1.606000 | \n","
1250 | \n","1.788500 | \n","
1300 | \n","1.709300 | \n","
1350 | \n","1.846800 | \n","
1400 | \n","1.579800 | \n","
1450 | \n","1.547300 | \n","
1500 | \n","1.808600 | \n","
1550 | \n","1.700200 | \n","
1600 | \n","1.689400 | \n","
1650 | \n","1.620700 | \n","
1700 | \n","1.542800 | \n","
1750 | \n","1.704500 | \n","
1800 | \n","1.568800 | \n","
1850 | \n","1.573700 | \n","
1900 | \n","1.532100 | \n","
1950 | \n","1.490700 | \n","
2000 | \n","1.598200 | \n","
2050 | \n","1.516800 | \n","
2100 | \n","1.485000 | \n","
2150 | \n","1.471400 | \n","
2200 | \n","1.589700 | \n","
2250 | \n","1.477100 | \n","
2300 | \n","1.572300 | \n","
2350 | \n","1.630200 | \n","
2400 | \n","1.605500 | \n","
2450 | \n","1.449500 | \n","
2500 | \n","1.529300 | \n","
2550 | \n","1.547800 | \n","
2600 | \n","1.447100 | \n","
2650 | \n","1.405500 | \n","
2700 | \n","1.488200 | \n","
2750 | \n","1.436800 | \n","
2800 | \n","1.550200 | \n","
2850 | \n","1.359300 | \n","
2900 | \n","1.551900 | \n","
2950 | \n","1.542100 | \n","
3000 | \n","1.443800 | \n","
3050 | \n","1.367900 | \n","
3100 | \n","1.531700 | \n","
3150 | \n","1.395000 | \n","
3200 | \n","1.450900 | \n","
3250 | \n","1.421700 | \n","
3300 | \n","1.604900 | \n","
3350 | \n","1.285700 | \n","
3400 | \n","1.496100 | \n","
3450 | \n","1.324000 | \n","
3500 | \n","1.416900 | \n","
3550 | \n","1.453200 | \n","
3600 | \n","1.484600 | \n","
3650 | \n","1.364600 | \n","
3700 | \n","1.384900 | \n","
3750 | \n","1.393500 | \n","
3800 | \n","1.307400 | \n","
3850 | \n","1.436700 | \n","
3900 | \n","1.291500 | \n","
3950 | \n","1.382800 | \n","
4000 | \n","1.318200 | \n","
4050 | \n","1.499000 | \n","
4100 | \n","1.368500 | \n","
4150 | \n","1.450000 | \n","
4200 | \n","1.245900 | \n","
4250 | \n","1.414600 | \n","
4300 | \n","1.418200 | \n","
4350 | \n","1.344600 | \n","
4400 | \n","1.340600 | \n","
4450 | \n","1.309100 | \n","
4500 | \n","1.371900 | \n","
4550 | \n","1.426700 | \n","
4600 | \n","1.318200 | \n","
4650 | \n","1.329600 | \n","
4700 | \n","1.403500 | \n","
4750 | \n","1.306100 | \n","
4800 | \n","1.294000 | \n","
4850 | \n","1.477900 | \n","
4900 | \n","1.272900 | \n","
4950 | \n","1.290900 | \n","
"],"text/plain":["