{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from PIL import Image\n", "import requests\n", "import torch\n", "from torchvision import io\n", "from typing import Dict\n", "from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "29ac356cdb05492d8a2da9bceea03b37", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/1.20k [00:00