Spaces:

govtech
/

Biome

Running

App Files Files Community

pyesonekyaw commited on Dec 10, 2024

Commit

b2f544f

verified ·

1 Parent(s): 4eb017d

Upload 13 files

Browse files

Files changed (13) hide show

.gitattributes +8 -0
README.md +3 -5
app.py +466 -0
examples/bubul.jpg +3 -0
examples/hornbill.jpg +3 -0
examples/lizard.jpg +3 -0
examples/monkey.jpg +3 -0
examples/otter.jpg +3 -0
image.jpg +0 -0
photo_lookup.json +3 -0
requirements.txt +6 -0
species_lookup.json +3 -0
txt_emb_species.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/bubul.jpg filter=lfs diff=lfs merge=lfs -text
+examples/hornbill.jpg filter=lfs diff=lfs merge=lfs -text
+examples/lizard.jpg filter=lfs diff=lfs merge=lfs -text
+examples/monkey.jpg filter=lfs diff=lfs merge=lfs -text
+examples/otter.jpg filter=lfs diff=lfs merge=lfs -text
+photo_lookup.json filter=lfs diff=lfs merge=lfs -text
+species_lookup.json filter=lfs diff=lfs merge=lfs -text
+txt_emb_species.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,14 +1,12 @@
 ---
 title: Biome
 emoji: 👀
-colorFrom: indigo
-colorTo: indigo
 sdk: gradio
 sdk_version: 5.8.0
 app_file: app.py
 pinned: false
 license: mit
 short_description: Multimodal search & retrieval-based biodiversity recognition
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Biome
 emoji: 👀
+colorFrom: green
+colorTo: green
 sdk: gradio
 sdk_version: 5.8.0
 app_file: app.py
 pinned: false
 license: mit
 short_description: Multimodal search & retrieval-based biodiversity recognition
+---

app.py ADDED Viewed

	@@ -0,0 +1,466 @@

+import collections
+import heapq
+import json
+import os
+import logging
+import faiss
+import requests
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn.functional as F
+from open_clip import create_model, get_tokenizer
+from torchvision import transforms
+from PIL import Image
+import io
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+log_format = "[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s"
+logging.basicConfig(level=logging.INFO, format=log_format)
+logger = logging.getLogger()
+hf_token = os.getenv("HF_TOKEN")
+model_str = "hf-hub:imageomics/bioclip"
+tokenizer_str = "ViT-B-16"
+txt_emb_npy = hf_hub_download(repo_id="pyesonekyaw/biome_lfs", filename='txt_emb_species.npy', repo_type="dataset")
+txt_names_json = "txt_emb_species.json"
+min_prob = 1e-9
+k = 5
+ranks = ("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+preprocess_img = transforms.Compose(
+    [
+        transforms.ToTensor(),
+        transforms.Resize((224, 224), antialias=True),
+        transforms.Normalize(
+            mean=(0.48145466, 0.4578275, 0.40821073),
+            std=(0.26862954, 0.26130258, 0.27577711),
+        ),
+    ]
+)
+MIN_PROB = 1e-9
+TOP_K_PREDICTIONS = 5
+TOP_K_CANDIDATES = 250
+TOP_N_SIMILAR = 22
+SIMILARITY_BOOST = 0.2
+VOTE_THRESHOLD = 3
+SIMILARITY_THRESHOLD = 0.99
+# Add paths for RAG
+PHOTO_LOOKUP_PATH = f"./photo_lookup.json"
+SPECIES_LOOKUP_PATH = f"./species_lookup.json"
+theme = gr.themes.Base(
+    primary_hue=gr.themes.colors.teal,
+    secondary_hue=gr.themes.colors.blue,
+    neutral_hue=gr.themes.colors.gray,
+    text_size=gr.themes.sizes.text_lg,
+).set(
+    button_primary_background_fill="#114A56",
+    button_primary_background_fill_hover="#114A56",
+    block_title_text_weight="600",
+    block_label_text_weight="600",
+    block_label_text_size="*text_md",
+)
+EXAMPLES_DIR = Path("examples")
+example_images = sorted(str(p) for p in EXAMPLES_DIR.glob("*.jpg"))
+def indexed(lst, indices):
+    return [lst[i] for i in indices]
+def format_name(taxon, common):
+    taxon = " ".join(taxon)
+    if not common:
+        return taxon
+    return f"{taxon} ({common})"
+def combine_duplicate_predictions(predictions):
+    """Combine predictions where one name is contained within another."""
+    combined = {}
+    used = set()
+    # Sort by length of name (longer names first) and probability
+    items = sorted(predictions.items(), key=lambda x: (-len(x[0]), -x[1]))
+    for name1, prob1 in items:
+        if name1 in used:
+            continue
+        total_prob = prob1
+        used.add(name1)
+        # Check remaining predictions
+        for name2, prob2 in predictions.items():
+            if name2 in used:
+                continue
+            # Convert to lowercase for comparison
+            name1_lower = name1.lower()
+            name2_lower = name2.lower()
+            # Check if one name contains the other
+            if name1_lower in name2_lower or name2_lower in name1_lower:
+                total_prob += prob2
+                used.add(name2)
+        combined[name1] = total_prob
+    # Normalize probabilities
+    total = sum(combined.values())
+    return {k: v/total for k, v in combined.items()}
+@torch.no_grad()
+def open_domain_classification(img, rank: int, return_all=False):
+    """
+    Predicts from the entire tree of life using RAG approach.
+    """
+    logger.info(f"Starting open domain classification for rank: {rank}")
+    img = preprocess_img(img).to(device)
+    img_features = model.encode_image(img.unsqueeze(0))
+    img_features = F.normalize(img_features, dim=-1)
+    # Get zero-shot predictions
+    logits = (model.logit_scale.exp() * img_features @ txt_emb).squeeze()
+    probs = F.softmax(logits, dim=0)
+    # Get similar images votes and metadata
+    species_votes, similar_images = get_similar_images_metadata(img_features, faiss_index, id_mapping, name_mapping)
+    if rank + 1 == len(ranks):
+        # Species level prediction
+        topk = probs.topk(TOP_K_CANDIDATES)
+        predictions = {
+            format_name(*txt_names[i]): prob.item()
+            for i, prob in zip(topk.indices, topk.values)
+        }
+        # Augment predictions with votes
+        augmented_predictions = predictions.copy()
+        for pred_name in predictions:
+            pred_name_lower = pred_name.lower()
+            for voted_species, vote_count in species_votes.items():
+                if voted_species in pred_name_lower or pred_name_lower in voted_species:
+                    augmented_predictions[pred_name] += SIMILARITY_BOOST * vote_count
+                elif vote_count >= VOTE_THRESHOLD:
+                    augmented_predictions[voted_species] = vote_count * SIMILARITY_BOOST
+        # Sort predictions
+        sorted_predictions = dict(sorted(
+            augmented_predictions.items(),
+            key=lambda x: x[1],
+            reverse=True
+        )[:k])
+        # Normalize and combine duplicates
+        total = sum(sorted_predictions.values())
+        sorted_predictions = {k: v/total for k, v in sorted_predictions.items()}
+        sorted_predictions = combine_duplicate_predictions(sorted_predictions)
+        logger.info(f"Top K predictions after combining duplicates: {sorted_predictions}")
+        return sorted_predictions, similar_images
+    # Higher rank prediction
+    output = collections.defaultdict(float)
+    for i in torch.nonzero(probs > MIN_PROB).squeeze():
+        output[" ".join(txt_names[i][0][: rank + 1])] += probs[i]
+    # Incorporate votes for higher ranks
+    for species, vote_count in species_votes.items():
+        try:
+            # Find matching taxonomy in txt_names
+            for taxonomy, _ in txt_names:
+                if species in " ".join(taxonomy).lower():
+                    higher_rank = " ".join(taxonomy[: rank + 1])
+                    output[higher_rank] += SIMILARITY_BOOST * vote_count
+                    break
+        except Exception as e:
+            logger.error(f"Error processing vote for species {species}: {e}")
+    # Get top-k predictions and normalize
+    topk_names = heapq.nlargest(k, output, key=output.get)
+    prediction_dict = {name: output[name] for name in topk_names}
+    # Normalize probabilities to sum to 1
+    total = sum(prediction_dict.values())
+    prediction_dict = {k: v/total for k, v in prediction_dict.items()}
+    prediction_dict = combine_duplicate_predictions(prediction_dict)
+    logger.info(f"Prediction dictionary after combining duplicates: {prediction_dict}")
+    return prediction_dict, similar_images
+def change_output(choice):
+    return gr.Label(num_top_classes=k, label=ranks[choice], show_label=True, value=None)
+def get_cache_paths(name="demo"):
+    """Get paths for cached FAISS index and ID mapping."""
+    return {
+        'index': hf_hub_download(repo_id="pyesonekyaw/biome_lfs", filename='cache/faiss_cache_demo.index', repo_type="dataset"),
+        'mapping': hf_hub_download(repo_id="pyesonekyaw/biome_lfs", filename='cache/faiss_cache_demo_mapping.json', repo_type="dataset")
+    }
+def build_name_mapping(txt_names):
+    """Build mapping between scientific names and common names."""
+    name_mapping = {}
+    for taxonomy, common_name in txt_names:
+        if not common_name:
+            continue
+        if len(taxonomy) >= 2:
+            scientific_name = f"{taxonomy[-2]} {taxonomy[-1]}".lower()
+            common_name = common_name.lower()
+            name_mapping[scientific_name] = (scientific_name, common_name)
+            name_mapping[common_name] = (scientific_name, common_name)
+    return name_mapping
+def load_faiss_index():
+    """Load FAISS index from cache."""
+    cache_paths = get_cache_paths()
+    logger.info("Loading FAISS index from cache...")
+    index = faiss.read_index(cache_paths['index'])
+    with open(cache_paths['mapping'], 'r') as f:
+        id_mapping = json.load(f)
+    return index, id_mapping
+def get_similar_images_metadata(img_embedding, faiss_index, id_mapping, name_mapping):
+    """Get metadata for similar images using FAISS search."""
+    img_embedding_np = img_embedding.cpu().numpy()
+    if img_embedding_np.ndim == 1:
+        img_embedding_np = img_embedding_np.reshape(1, -1)
+    # Search for more images than needed to account for filtered matches
+    distances, indices = faiss_index.search(img_embedding_np, TOP_N_SIMILAR * 2)
+    # Filter out near-exact matches
+    valid_indices = []
+    valid_distances = []
+    valid_count = 0
+    for dist, idx in zip(distances[0], indices[0]):
+        # For inner product similarity, the distance is already the similarity
+        similarity = dist
+        if similarity > SIMILARITY_THRESHOLD:
+            continue
+        valid_indices.append(idx)
+        valid_distances.append(similarity)
+        valid_count += 1
+        if valid_count >= TOP_N_SIMILAR:
+            break
+    species_votes = {}
+    similar_images = []
+    for idx, similarity in zip(valid_indices[:5], valid_distances[:5]):  # Only process top 5 for display
+        similar_img_id = id_mapping[idx]
+        try:
+            species_names = id_to_species_info.get(similar_img_id)
+            species_names = [name for name in species_names if name]
+            processed_names = set()
+            for species in species_names:
+                if not species:
+                    continue
+                name_tuple = name_mapping.get(species)
+                if name_tuple:
+                    processed_names.add(name_tuple[0])
+                else:
+                    processed_names.add(species)
+            for species in processed_names:
+                species_votes[species] = species_votes.get(species, 0) + 1
+            # Store similar image info if the image file exists
+            # if img_path and os.path.exists(img_path):
+            similar_images.append({
+                'id': similar_img_id,
+                'species': next(iter(processed_names)) if processed_names else 'Unknown',
+                'common_name': species_names[-1],
+                'similarity': similarity  # Add similarity score
+            })
+        except Exception as e:
+            logger.error(f"Error processing JSON for image {similar_img_id}: {e}")
+            continue
+    return species_votes, similar_images
+if __name__ == "__main__":
+    logger.info("Starting.")
+    model = create_model(model_str, output_dict=True, require_pretrained=True)
+    model = model.to(device)
+    logger.info("Created model.")
+    model = torch.compile(model)
+    logger.info("Compiled model.")
+    tokenizer = get_tokenizer(tokenizer_str)
+    id_to_photo_url = json.load(open(PHOTO_LOOKUP_PATH))
+    id_to_species_info = json.load(open(SPECIES_LOOKUP_PATH))
+    logger.info(f"Loaded {len(id_to_photo_url)} photo mappings")
+    logger.info(f"Loaded {len(id_to_species_info)} species mappings")
+    # Load text embeddings and build name mapping
+    txt_emb = torch.from_numpy(np.load(txt_emb_npy, mmap_mode="r")).to(device)
+    with open(txt_names_json) as fd:
+        txt_names = json.load(fd)
+    # Build name mapping
+    name_mapping = build_name_mapping(txt_names)
+    # Build or load FAISS index with test IDs
+    faiss_index, id_mapping = load_faiss_index()
+    # Define process_output function before using it
+    def process_output(img, rank):
+        predictions, similar_imgs = open_domain_classification(img, rank)
+        logger.info(f"Number of similar images found: {len(similar_imgs)}")
+        images = []
+        labels = []
+        for img_info in similar_imgs:
+            img_id = img_info['id']
+            img_url = id_to_photo_url.get(img_id)
+            img_url = img_url.replace("square", "small")
+            logger.info(f"Processing image URL: {img_url}")
+            try:
+                # Try fetching from URL first
+                response = requests.get(img_url)
+                if response.status_code == 200:
+                    try:
+                        img = Image.open(io.BytesIO(response.content))
+                        images.append(img)
+                    except Exception as e:
+                        logger.info(f"Failed to load image from URL: {e}")
+                        images.append(None)
+                else:
+                    logger.info(f"Failed to fetch image from URL: {response}")
+                    images.append(None)
+                # Add label regardless of image load success
+                label = f"**{img_info['species']}**"
+                if img_info['common_name']:
+                    label += f" ({img_info['common_name']})"
+                label += f"\nSimilarity: {img_info['similarity']:.3f}"
+                label += f"\n[View on iNaturalist](https://www.inaturalist.org/observations/{img_id})"
+                labels.append(label)
+            except Exception as e:
+                logger.error(f"Error processing image {img_id}: {e}")
+                images.append(None)
+                labels.append("")
+        # Pad arrays if needed
+        images += [None] * (5 - len(images))
+        labels += [""] * (5 - len(labels))
+        logger.info(f"Final number of images: {len(images)}")
+        logger.info(f"Final number of labels: {len(labels)}")
+        return [predictions] + images + labels
+    with gr.Blocks(theme=theme) as app:
+        # Add header
+        with gr.Row(variant="panel"):
+            with gr.Column(scale=1):
+                gr.Image("image.jpg", elem_id="logo-img",
+                        show_label=False )
+            with gr.Column(scale=30):
+                gr.Markdown("""Biome is a vision foundation model-powered tool customized to identify Singapore's local biodiversity.
+                <br/> <br/>
+                **Developed by**: Pye Sone Kyaw - AI Engineer @ Multimodal AI Team - AI Practice - GovTech SG
+                <br/> <br/>
+                Under the hood, Biome is using [BioCLIP](https://github.com/Imageomics/BioCLIP) augmented with multimodal search and retrieval to enhance its Singapore-specific biodiversity classification capabilities.
+                """)
+        with gr.Row(variant="panel", elem_id="images_panel"):
+            img_input = gr.Image(
+                height=400,
+                sources=["upload"],
+                type="pil"
+            )
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    gr.Examples(
+                        examples=example_images,
+                        inputs=img_input,
+                            label="Example Images"
+                        )
+                rank_dropdown = gr.Dropdown(
+                    label="Taxonomic Rank",
+                    info="Which taxonomic rank to predict. Fine-grained ranks (genus, species) are more challenging.",
+                    choices=ranks,
+                    value="Species",
+                    type="index",
+                )
+                open_domain_btn = gr.Button("Submit", variant="primary")
+            with gr.Column():
+                open_domain_output = gr.Label(
+                    num_top_classes=k,
+                    label="Prediction",
+                    show_label=True,
+                    value=None,
+                )
+        # New section for similar images
+        with gr.Row(variant="panel"):
+            with gr.Column():
+                gr.Markdown("### Most Similar Images from Database")
+        with gr.Row():
+            similar_images = [
+                gr.Image(label="Similar Image 1", height=200, show_label=True),
+                gr.Image(label="Similar Image 2", height=200, show_label=True),
+                gr.Image(label="Similar Image 3", height=200, show_label=True),
+                gr.Image(label="Similar Image 4", height=200, show_label=True),
+                gr.Image(label="Similar Image 5", height=200, show_label=True),
+            ]
+        with gr.Row():
+            similar_labels = [
+                gr.Markdown("Species 1"),
+                gr.Markdown("Species 2"),
+                gr.Markdown("Species 3"),
+                gr.Markdown("Species 4"),
+                gr.Markdown("Species 5"),
+            ]
+        rank_dropdown.change(
+            fn=change_output,
+            inputs=rank_dropdown,
+            outputs=[open_domain_output]
+        )
+        open_domain_btn.click(
+            fn=process_output,
+            inputs=[img_input, rank_dropdown],
+            outputs=[open_domain_output] + similar_images + similar_labels,
+        )
+        with gr.Row(variant="panel"):
+            gr.Markdown("""
+            **Disclaimer**: This is a proof-of-concept demo for non-commercial purposes. No data is stored or used for any form of training, and all data used for retrieval are from [iNaturalist](https://inaturalist.org/).
+            The adage of garbage in, garbage out applies here - uploading images not biodiversity-related will yield unpredictable results.
+            """)
+    app.queue(max_size=20)
+    app.launch(share=False, enable_monitoring=False, allowed_paths=["/app/"])