Spaces:

leedoming
/

itda-segment

Sleeping

App Files Files Community

leedoming commited on Oct 28, 2024

Commit

fd87108

verified ·

1 Parent(s): 55609ac

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -24

app.py CHANGED Viewed

@@ -56,9 +56,10 @@ def extract_color_histogram(image, mask=None):
     try:
         img_array = np.array(image)
         if mask is not None:
-            # Apply mask
-            mask = np.expand_dims(mask, axis=2)
-            img_array = img_array * mask
             # Only consider pixels that are part of the clothing item
             valid_pixels = img_array[mask[:,:,0] > 0]
         else:
@@ -66,24 +67,26 @@ def extract_color_histogram(image, mask=None):
         # Convert to HSV color space for better color representation
         if len(valid_pixels) > 0:
-            img_hsv = Image.fromarray(valid_pixels.reshape(1, -1, 3).astype(np.uint8)).convert('HSV')
             hsv_pixels = np.array(img_hsv)
             # Calculate histogram for each HSV channel
-            h_hist = np.histogram(hsv_pixels[:,:,0], bins=10, range=(0, 256))[0]
-            s_hist = np.histogram(hsv_pixels[:,:,1], bins=10, range=(0, 256))[0]
-            v_hist = np.histogram(hsv_pixels[:,:,2], bins=10, range=(0, 256))[0]
             # Normalize histograms
-            h_hist = h_hist / h_hist.sum() if h_hist.sum() > 0 else h_hist
-            s_hist = s_hist / s_hist.sum() if s_hist.sum() > 0 else s_hist
-            v_hist = v_hist / v_hist.sum() if v_hist.sum() > 0 else v_hist
             return np.concatenate([h_hist, s_hist, v_hist])
-        return np.zeros(30)  # Return zero histogram if no valid pixels
     except Exception as e:
         logger.error(f"Color histogram extraction error: {e}")
-        return np.zeros(30)
 def process_segmentation(image):
     """Segmentation processing"""
@@ -137,7 +140,7 @@ def extract_features(image, mask=None):
         # Extract CLIP features
         if mask is not None:
             img_array = np.array(image)
-            mask = np.expand_dims(mask, axis=2)
             masked_img = img_array * mask
             masked_img[mask[:,:,0] == 0] = 255  # Set background to white
             image = Image.fromarray(masked_img.astype(np.uint8))
@@ -151,19 +154,24 @@ def extract_features(image, mask=None):
         # Extract color features
         color_features = extract_color_histogram(image, mask)
-        # Combine features
-        # Note: We normalize and weight the features to balance their influence
-        clip_features_normalized = clip_features / np.linalg.norm(clip_features)
-        color_features_normalized = color_features / np.linalg.norm(color_features)
-        # Adjust these weights to control the influence of each feature type
-        clip_weight = 0.7  # CLIP features weight
-        color_weight = 0.3  # Color features weight
-        combined_features = np.concatenate([
-            clip_features_normalized * clip_weight,
-            color_features_normalized * color_weight
-        ])
         return combined_features
     except Exception as e:

     try:
         img_array = np.array(image)
         if mask is not None:
+            # Reshape mask to match image dimensions
+            mask = np.expand_dims(mask, axis=-1)  # Add channel dimension
+            img_array = img_array * mask  # Broadcasting will work correctly now
             # Only consider pixels that are part of the clothing item
             valid_pixels = img_array[mask[:,:,0] > 0]
         else:
         # Convert to HSV color space for better color representation
         if len(valid_pixels) > 0:
+            # Reshape to proper dimensions for PIL Image
+            valid_pixels = valid_pixels.reshape(-1, 3)
+            img_hsv = Image.fromarray(valid_pixels.astype(np.uint8)).convert('HSV')
             hsv_pixels = np.array(img_hsv)
             # Calculate histogram for each HSV channel
+            h_hist = np.histogram(hsv_pixels[:,0], bins=8, range=(0, 256))[0]
+            s_hist = np.histogram(hsv_pixels[:,1], bins=8, range=(0, 256))[0]
+            v_hist = np.histogram(hsv_pixels[:,2], bins=8, range=(0, 256))[0]
             # Normalize histograms
+            h_hist = h_hist / (h_hist.sum() + 1e-8)  # Add small epsilon to avoid division by zero
+            s_hist = s_hist / (s_hist.sum() + 1e-8)
+            v_hist = v_hist / (v_hist.sum() + 1e-8)
             return np.concatenate([h_hist, s_hist, v_hist])
+        return np.zeros(24)  # 8bins * 3channels = 24 features
     except Exception as e:
         logger.error(f"Color histogram extraction error: {e}")
+        return np.zeros(24)
 def process_segmentation(image):
     """Segmentation processing"""
         # Extract CLIP features
         if mask is not None:
             img_array = np.array(image)
+            mask = np.expand_dims(mask, axis=-1)
             masked_img = img_array * mask
             masked_img[mask[:,:,0] == 0] = 255  # Set background to white
             image = Image.fromarray(masked_img.astype(np.uint8))
         # Extract color features
         color_features = extract_color_histogram(image, mask)
+        # CLIP features are 768-dimensional, so we'll resize color features
+        # to maintain the same total dimensionality
+        clip_features = clip_features[:744]  # Trim CLIP features to make room for color
+        # Normalize features
+        clip_features_normalized = clip_features / (np.linalg.norm(clip_features) + 1e-8)
+        color_features_normalized = color_features / (np.linalg.norm(color_features) + 1e-8)
+        # Adjust weights (total should be 768 to match collection dimensionality)
+        clip_weight = 0.7
+        color_weight = 0.3
+        combined_features = np.zeros(768)  # Initialize with zeros
+        combined_features[:744] = clip_features_normalized * clip_weight  # First 744 dimensions for CLIP
+        combined_features[744:] = color_features_normalized * color_weight  # Last 24 dimensions for color
+        # Ensure final normalization
+        combined_features = combined_features / (np.linalg.norm(combined_features) + 1e-8)
         return combined_features
     except Exception as e: