KasKniesmeijer commited on
Commit
3a3e2e6
·
1 Parent(s): 460bccf

code works

Browse files
Files changed (2) hide show
  1. app.py +8 -10
  2. demo.ipynb +104 -0
app.py CHANGED
@@ -13,7 +13,7 @@ try:
13
  processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
14
  model = AutoModelForVision2Seq.from_pretrained(
15
  "HuggingFaceTB/SmolVLM-Instruct",
16
- torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
17
  _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
18
  ).to(DEVICE)
19
  except Exception as e:
@@ -56,23 +56,21 @@ def answer_question(image, question):
56
  except Exception as e:
57
  return f"Error: Failed to prepare inputs. {str(e)}"
58
 
59
- # Generate the output
60
  try:
61
- generated_ids = model.generate(**inputs, max_new_tokens=500)
62
- generated_texts = processor.batch_decode(
63
- generated_ids, skip_special_tokens=True
64
- )
65
- return generated_texts[0]
66
  except Exception as e:
67
- return f"Error: Failed to generate output. {str(e)}"
68
 
69
 
70
  # Create Gradio interface
71
  iface = gr.Interface(
72
  fn=answer_question,
73
  inputs=[
74
- gr.inputs.Image(type="numpy"),
75
- gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),
76
  ],
77
  outputs="text",
78
  title="Image Question Answering",
 
13
  processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
14
  model = AutoModelForVision2Seq.from_pretrained(
15
  "HuggingFaceTB/SmolVLM-Instruct",
16
+ torch_dtype=torch.bfloat16,
17
  _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
18
  ).to(DEVICE)
19
  except Exception as e:
 
56
  except Exception as e:
57
  return f"Error: Failed to prepare inputs. {str(e)}"
58
 
59
+ # Generate the answer
60
  try:
61
+ outputs = model.generate(**inputs)
62
+ answer = processor.decode(outputs[0], skip_special_tokens=True)
63
+ return answer
 
 
64
  except Exception as e:
65
+ return f"Error: Failed to generate answer. {str(e)}"
66
 
67
 
68
  # Create Gradio interface
69
  iface = gr.Interface(
70
  fn=answer_question,
71
  inputs=[
72
+ gr.Image(type="numpy"),
73
+ gr.Textbox(lines=2, placeholder="Enter your question here..."),
74
  ],
75
  outputs="text",
76
  title="Image Question Answering",
demo.ipynb ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/home/kask/miniconda3/envs/innovatie-week/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "from transformers import AutoProcessor, AutoModelForVision2Seq\n",
19
+ "from transformers.image_utils import load_image\n",
20
+ "import numpy as np\n",
21
+ "import gradio as gr\n",
22
+ "import torch\n",
23
+ "from PIL import Image"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 3,
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "name": "stdout",
33
+ "output_type": "stream",
34
+ "text": [
35
+ "cpu\n"
36
+ ]
37
+ }
38
+ ],
39
+ "source": [
40
+ "# Set the device (GPU or CPU)\n",
41
+ "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
42
+ "print(DEVICE)"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": null,
48
+ "metadata": {},
49
+ "outputs": [
50
+ {
51
+ "name": "stderr",
52
+ "output_type": "stream",
53
+ "text": [
54
+ "Some kwargs in processor config are unused and will not have any effect: image_seq_len. \n"
55
+ ]
56
+ },
57
+ {
58
+ "ename": "",
59
+ "evalue": "",
60
+ "output_type": "error",
61
+ "traceback": [
62
+ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
63
+ "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
64
+ "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
65
+ "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
66
+ ]
67
+ }
68
+ ],
69
+ "source": [
70
+ "# Initialize processor and model\n",
71
+ "try:\n",
72
+ " processor = AutoProcessor.from_pretrained(\"HuggingFaceTB/SmolVLM-Instruct\")\n",
73
+ " model = AutoModelForVision2Seq.from_pretrained(\n",
74
+ " \"HuggingFaceTB/SmolVLM-Instruct\",\n",
75
+ " torch_dtype=torch.bfloat16,\n",
76
+ " _attn_implementation=\"flash_attention_2\" if DEVICE == \"cuda\" else \"eager\",).to(DEVICE)\n",
77
+ "except Exception as e:\n",
78
+ " print(f\"Error loading model or processor: {str(e)}\")\n",
79
+ " exit(1)"
80
+ ]
81
+ }
82
+ ],
83
+ "metadata": {
84
+ "kernelspec": {
85
+ "display_name": "innovatie-week",
86
+ "language": "python",
87
+ "name": "python3"
88
+ },
89
+ "language_info": {
90
+ "codemirror_mode": {
91
+ "name": "ipython",
92
+ "version": 3
93
+ },
94
+ "file_extension": ".py",
95
+ "mimetype": "text/x-python",
96
+ "name": "python",
97
+ "nbconvert_exporter": "python",
98
+ "pygments_lexer": "ipython3",
99
+ "version": "3.12.1"
100
+ }
101
+ },
102
+ "nbformat": 4,
103
+ "nbformat_minor": 2
104
+ }