arssite commited on
Commit
744110a
·
verified ·
1 Parent(s): 8bcbe28

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +72 -72
  2. app2.py +147 -0
  3. requirements.txt +2 -0
app.py CHANGED
@@ -1,73 +1,73 @@
1
- import gradio as gr
2
- from PIL import Image, ImageDraw, ImageFont
3
-
4
-
5
- # Use a pipeline as a high-level helper
6
- from transformers import pipeline
7
-
8
- # model_path = ("../Models/models--facebook--detr-resnet-50/snapshots"
9
- # "/1d5f47bd3bdd2c4bbfa585418ffe6da5028b4c0b")
10
-
11
- object_detector = pipeline("object-detection",
12
- model="facebook/detr-resnet-50")
13
-
14
- # object_detector = pipeline("object-detection",
15
- # model=model_path)
16
-
17
-
18
- def draw_bounding_boxes(image, detections, font_path=None, font_size=20):
19
- # Make a copy of the image to draw on
20
- draw_image = image.copy()
21
- draw = ImageDraw.Draw(draw_image)
22
-
23
- # Load custom font or default font if path not provided
24
- if font_path:
25
- font = ImageFont.truetype(font_path, font_size)
26
- else:
27
- # When font_path is not provided, load default font but it's size is fixed
28
- font = ImageFont.load_default()
29
- # Increase font size workaround by using a TTF font file, if needed, can download and specify the path
30
-
31
- for detection in detections:
32
- box = detection['box']
33
- xmin = box['xmin']
34
- ymin = box['ymin']
35
- xmax = box['xmax']
36
- ymax = box['ymax']
37
-
38
- # Draw the bounding box
39
- draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)
40
-
41
- # Optionally, you can also draw the label and score
42
- label = detection['label']
43
- score = detection['score']
44
- text = f"{label} {score:.2f}"
45
-
46
- # Draw text with background rectangle for visibility
47
- if font_path: # Use the custom font with increased size
48
- text_size = draw.textbbox((xmin, ymin), text, font=font)
49
- else:
50
- # Calculate text size using the default font
51
- text_size = draw.textbbox((xmin, ymin), text)
52
-
53
- draw.rectangle([(text_size[0], text_size[1]), (text_size[2], text_size[3])], fill="red")
54
- draw.text((xmin, ymin), text, fill="white", font=font)
55
-
56
- return draw_image
57
-
58
-
59
- def detect_object(image):
60
- raw_image = image
61
- lst=[]
62
- output = object_detector(raw_image)
63
- for i in output:
64
- lst.append(i['label'])
65
- processed_image = draw_bounding_boxes(raw_image, output)
66
- return processed_image,lst
67
-
68
- demo = gr.Interface(fn=detect_object,
69
- inputs=[gr.Image(label="Select Image",type="pil")],
70
- outputs=[gr.Image(label="Processed Image", type="pil"),gr.Textbox(label="Objcts", lines=3),],
71
- title="Object Detector",
72
- description="THIS APPLICATION WILL BE USED TO DETECT OBJECTS INSIDE THE PROVIDED INPUT IMAGE / Live FEED .")
73
  demo.launch()
 
1
+ import gradio as gr
2
+ from PIL import Image, ImageDraw, ImageFont
3
+
4
+
5
+ # Use a pipeline as a high-level helper
6
+ from transformers import pipeline
7
+
8
+ # model_path = ("../Models/models--facebook--detr-resnet-50/snapshots"
9
+ # "/1d5f47bd3bdd2c4bbfa585418ffe6da5028b4c0b")
10
+
11
+ object_detector = pipeline("object-detection",
12
+ model="facebook/detr-resnet-50")
13
+
14
+ # object_detector = pipeline("object-detection",
15
+ # model=model_path)
16
+
17
+
18
+ def draw_bounding_boxes(image, detections, font_path=None, font_size=20):
19
+ # Make a copy of the image to draw on
20
+ draw_image = image.copy()
21
+ draw = ImageDraw.Draw(draw_image)
22
+
23
+ # Load custom font or default font if path not provided
24
+ if font_path:
25
+ font = ImageFont.truetype(font_path, font_size)
26
+ else:
27
+ # When font_path is not provided, load default font but it's size is fixed
28
+ font = ImageFont.load_default()
29
+ # Increase font size workaround by using a TTF font file, if needed, can download and specify the path
30
+
31
+ for detection in detections:
32
+ box = detection['box']
33
+ xmin = box['xmin']
34
+ ymin = box['ymin']
35
+ xmax = box['xmax']
36
+ ymax = box['ymax']
37
+
38
+ # Draw the bounding box
39
+ draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)
40
+
41
+ # Optionally, you can also draw the label and score
42
+ label = detection['label']
43
+ score = detection['score']
44
+ text = f"{label} {score:.2f}"
45
+
46
+ # Draw text with background rectangle for visibility
47
+ if font_path: # Use the custom font with increased size
48
+ text_size = draw.textbbox((xmin, ymin), text, font=font)
49
+ else:
50
+ # Calculate text size using the default font
51
+ text_size = draw.textbbox((xmin, ymin), text)
52
+
53
+ draw.rectangle([(text_size[0], text_size[1]), (text_size[2], text_size[3])], fill="red")
54
+ draw.text((xmin, ymin), text, fill="white", font=font)
55
+
56
+ return draw_image
57
+
58
+
59
+ def detect_object(image):
60
+ raw_image = image
61
+ lst=[]
62
+ output = object_detector(raw_image)
63
+ for i in output:
64
+ lst.append(i['label'])
65
+ processed_image = draw_bounding_boxes(raw_image, output)
66
+ return processed_image,lst
67
+
68
+ demo = gr.Interface(fn=detect_object,
69
+ inputs=[gr.Image(label="Select Image",type="pil")],
70
+ outputs=[gr.Image(label="Processed Image", type="pil"),gr.Textbox(label="Objcts", lines=3),],
71
+ title="@GenAILearniverse Project 6: Object Detector",
72
+ description="THIS APPLICATION WILL BE USED TO DETECT OBJECTS INSIDE THE PROVIDED INPUT IMAGE.")
73
  demo.launch()
app2.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image, ImageDraw, ImageFont
3
+ import scipy.io.wavfile as wavfile
4
+
5
+
6
+ # Use a pipeline as a high-level helper
7
+ from transformers import pipeline
8
+
9
+ # model_path = ("../Models/models--facebook--detr-resnet-50/snapshots"
10
+ # "/1d5f47bd3bdd2c4bbfa585418ffe6da5028b4c0b")
11
+ #
12
+ # tts_model_path = ("../Models/models--kakao-enterprise--vits-ljs/snapshots"
13
+ # "/3bcb8321394f671bd948ebf0d086d694dda95464")
14
+
15
+
16
+ narrator = pipeline("text-to-speech",
17
+ model="kakao-enterprise/vits-ljs")
18
+
19
+ object_detector = pipeline("object-detection",
20
+ model="facebook/detr-resnet-50")
21
+
22
+ # object_detector = pipeline("object-detection",
23
+ # model=model_path)
24
+ #
25
+ # narrator = pipeline("text-to-speech",
26
+ # model=tts_model_path)
27
+
28
+ # [{'score': 0.9996405839920044, 'label': 'person', 'box': {'xmin': 435, 'ymin': 282, 'xmax': 636, 'ymax': 927}}, {'score': 0.9995879530906677, 'label': 'dog', 'box': {'xmin': 570, 'ymin': 694, 'xmax': 833, 'ymax': 946}}]
29
+
30
+ # Define the function to generate audio from text
31
+ def generate_audio(text):
32
+ # Generate the narrated text
33
+ narrated_text = narrator(text)
34
+
35
+ # Save the audio to a WAV file
36
+ wavfile.write("output.wav", rate=narrated_text["sampling_rate"],
37
+ data=narrated_text["audio"][0])
38
+
39
+ # Return the path to the saved audio file
40
+ return "output.wav"
41
+
42
+ # Could you please write me a python code that will take list of detection object as an input and it will give the response that will include all the objects (labels) provided in the input. For example if the input is like this: [{'score': 0.9996405839920044, 'label': 'person', 'box': {'xmin': 435, 'ymin': 282, 'xmax': 636, 'ymax': 927}}, {'score': 0.9995879530906677, 'label': 'dog', 'box': {'xmin': 570, 'ymin': 694, 'xmax': 833, 'ymax': 946}}]
43
+ # The output should be, This pictuture contains 1 person and 1 dog. If there are multiple objects, do not add 'and' between every objects but 'and' should be at the end only
44
+
45
+
46
+ def read_objects(detection_objects):
47
+ # Initialize counters for each object label
48
+ object_counts = {}
49
+
50
+ # Count the occurrences of each label
51
+ for detection in detection_objects:
52
+ label = detection['label']
53
+ if label in object_counts:
54
+ object_counts[label] += 1
55
+ else:
56
+ object_counts[label] = 1
57
+
58
+ # Generate the response string
59
+ response = "This picture contains"
60
+ labels = list(object_counts.keys())
61
+ for i, label in enumerate(labels):
62
+ response += f" {object_counts[label]} {label}"
63
+ if object_counts[label] > 1:
64
+ response += "s"
65
+ if i < len(labels) - 2:
66
+ response += ","
67
+ elif i == len(labels) - 2:
68
+ response += " and"
69
+
70
+ response += "."
71
+
72
+ return response
73
+
74
+
75
+
76
+ def draw_bounding_boxes(image, detections, font_path=None, font_size=20):
77
+ """
78
+ Draws bounding boxes on the given image based on the detections.
79
+ :param image: PIL.Image object
80
+ :param detections: List of detection results, where each result is a dictionary containing
81
+ 'score', 'label', and 'box' keys. 'box' itself is a dictionary with 'xmin',
82
+ 'ymin', 'xmax', 'ymax'.
83
+ :param font_path: Path to the TrueType font file to use for text.
84
+ :param font_size: Size of the font to use for text.
85
+ :return: PIL.Image object with bounding boxes drawn.
86
+ """
87
+ # Make a copy of the image to draw on
88
+ draw_image = image.copy()
89
+ draw = ImageDraw.Draw(draw_image)
90
+
91
+ # Load custom font or default font if path not provided
92
+ if font_path:
93
+ font = ImageFont.truetype(font_path, font_size)
94
+ else:
95
+ # When font_path is not provided, load default font but it's size is fixed
96
+ font = ImageFont.load_default()
97
+ # Increase font size workaround by using a TTF font file, if needed, can download and specify the path
98
+
99
+ for detection in detections:
100
+ box = detection['box']
101
+ xmin = box['xmin']
102
+ ymin = box['ymin']
103
+ xmax = box['xmax']
104
+ ymax = box['ymax']
105
+
106
+ # Draw the bounding box
107
+ draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)
108
+
109
+ # Optionally, you can also draw the label and score
110
+ label = detection['label']
111
+ score = detection['score']
112
+ text = f"{label} {score:.2f}"
113
+
114
+ # Draw text with background rectangle for visibility
115
+ if font_path: # Use the custom font with increased size
116
+ text_size = draw.textbbox((xmin, ymin), text, font=font)
117
+ else:
118
+ # Calculate text size using the default font
119
+ text_size = draw.textbbox((xmin, ymin), text)
120
+
121
+ draw.rectangle([(text_size[0], text_size[1]), (text_size[2], text_size[3])], fill="red")
122
+ draw.text((xmin, ymin), text, fill="white", font=font)
123
+
124
+ return draw_image
125
+
126
+
127
+ def detect_object(image):
128
+ raw_image = image
129
+ output = object_detector(raw_image)
130
+ processed_image = draw_bounding_boxes(raw_image, output)
131
+ natural_text = read_objects(output)
132
+ processed_audio = generate_audio(natural_text)
133
+ return processed_image, processed_audio
134
+
135
+
136
+ demo = gr.Interface(fn=detect_object,
137
+ inputs=[gr.Image(label="Select Image",type="pil")],
138
+ outputs=[gr.Image(label="Processed Image", type="pil"), gr.Audio(label="Generated Audio")],
139
+ title="Object Detector with Audio",
140
+ description="THIS APPLICATION WILL BE USED TO HIGHLIGHT OBJECTS AND GIVES AUDIO DESCRIPTION FOR THE PROVIDED INPUT IMAGE.")
141
+ demo.launch()
142
+
143
+ # print(output)
144
+
145
+
146
+
147
+
requirements.txt CHANGED
@@ -3,3 +3,5 @@ transformers
3
  timm
4
  gradio
5
  torch
 
 
 
3
  timm
4
  gradio
5
  torch
6
+ scipy
7
+ phonemizer