KasKniesmeijer commited on
Commit
460bccf
·
1 Parent(s): ff6b5fc

added logs

Browse files
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +19 -11
  3. src/main.js +21 -12
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.gradio
2
+ *.csv
3
+ *.jpg
app.py CHANGED
@@ -9,12 +9,16 @@ import gradio as gr
9
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  # Initialize processor and model
12
- processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
13
- model = AutoModelForVision2Seq.from_pretrained(
14
- "HuggingFaceTB/SmolVLM-Instruct",
15
- torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
16
- _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
17
- ).to(DEVICE)
 
 
 
 
18
 
19
 
20
  # Define the function to answer questions
@@ -63,13 +67,17 @@ def answer_question(image, question):
63
  return f"Error: Failed to generate output. {str(e)}"
64
 
65
 
66
- interface = gr.Interface(
 
67
  fn=answer_question,
68
- inputs=["image", "text"], # Image and text inputs
 
 
 
69
  outputs="text",
70
- title="SmolVLM - Vision-Language Question Answering",
71
- description="Upload an image and ask a question to get an answer powered by SmolVLM.",
72
  )
73
 
74
  if __name__ == "__main__":
75
- interface.launch()
 
9
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  # Initialize processor and model
12
+ try:
13
+ processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
14
+ model = AutoModelForVision2Seq.from_pretrained(
15
+ "HuggingFaceTB/SmolVLM-Instruct",
16
+ torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
17
+ _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
18
+ ).to(DEVICE)
19
+ except Exception as e:
20
+ print(f"Error loading model or processor: {str(e)}")
21
+ exit(1)
22
 
23
 
24
  # Define the function to answer questions
 
67
  return f"Error: Failed to generate output. {str(e)}"
68
 
69
 
70
+ # Create Gradio interface
71
+ iface = gr.Interface(
72
  fn=answer_question,
73
+ inputs=[
74
+ gr.inputs.Image(type="numpy"),
75
+ gr.inputs.Textbox(lines=2, placeholder="Enter your question here..."),
76
+ ],
77
  outputs="text",
78
+ title="Image Question Answering",
79
+ description="Upload an image and ask a question about it.",
80
  )
81
 
82
  if __name__ == "__main__":
83
+ iface.launch()
src/main.js CHANGED
@@ -19,29 +19,38 @@ async function initializeWebGPU() {
19
  console.log("WebGPU initialized.");
20
  }
21
 
22
- // Submit the image and question to the backend
23
  async function submitQuestion(imageFile, question) {
24
  const formData = new FormData();
25
  formData.append("image", imageFile);
26
  formData.append("text", question);
27
 
28
- const response = await fetch("/predict", {
29
- method: "POST",
30
- body: formData,
31
- });
32
-
33
- if (!response.ok) {
34
- console.error("Failed to get a response:", response.statusText);
35
- return "Error: Unable to fetch the answer.";
 
 
 
 
 
 
 
 
 
36
  }
37
-
38
- const result = await response.json();
39
- return result.data[0];
40
  }
41
 
42
  // Handle user interactions
43
  document.getElementById("submit-btn").addEventListener("click", async () => {
44
  const imageFile = document.getElementById("image-upload").files[0];
 
 
 
 
45
  const question = document.getElementById("question").value;
46
 
47
  const answer = await submitQuestion(imageFile, question);
 
19
  console.log("WebGPU initialized.");
20
  }
21
 
 
22
  async function submitQuestion(imageFile, question) {
23
  const formData = new FormData();
24
  formData.append("image", imageFile);
25
  formData.append("text", question);
26
 
27
+ try {
28
+ const response = await fetch("/predict", {
29
+ method: "POST",
30
+ body: formData,
31
+ });
32
+
33
+ if (!response.ok) {
34
+ const errorText = await response.text();
35
+ console.error("Failed to get a response:", response.status, response.statusText, errorText);
36
+ return `Error: Unable to fetch the answer. Status: ${response.status}, ${response.statusText}`;
37
+ }
38
+
39
+ const result = await response.json();
40
+ return result.data[0];
41
+ } catch (error) {
42
+ console.error("Fetch error:", error);
43
+ return `Error: Unable to fetch the answer. ${error.message}`;
44
  }
 
 
 
45
  }
46
 
47
  // Handle user interactions
48
  document.getElementById("submit-btn").addEventListener("click", async () => {
49
  const imageFile = document.getElementById("image-upload").files[0];
50
+ if (!imageFile) {
51
+ alert("Please upload an image.");
52
+ return;
53
+ }
54
  const question = document.getElementById("question").value;
55
 
56
  const answer = await submitQuestion(imageFile, question);