init working

Browse files

Files changed (13) hide show

.gitignore +70 -0
README.md +103 -0
cog.yaml +18 -0
predict.py +69 -0
requirements.txt +80 -0
scripts/hyperparameter_tuning.py +79 -0
scripts/inference.py +106 -0
scripts/train.py +264 -0
scripts/visualization/miscalculations_report.py +93 -0
scripts/visualization/visualize.py +163 -0
src/models/__init__.py +0 -0
src/models/model.py +42 -0
src/utils/utils.py +82 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,70 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+ENV/
+.env
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+# Project specific
+runs/
+checkpoints/
+*.pth
+*.ckpt
+*.pt
+wandb/
+logs/
+.cog/
+# Data
+data/
+*.mp4
+*.avi
+*.mov
+*.jpg
+*.jpeg
+*.png
+*.gif
+*.h5
+*.npy
+*.npz
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+# Logs
+*.log
+*.csv
+*.json
+# Keep specific config files
+!config.json
+!requirements.txt

README.md ADDED Viewed

	@@ -0,0 +1,103 @@

+# CLIP-Based Break Dance Move Classifier
+A deep learning model for classifying break dance moves using CLIP (Contrastive Language-Image Pre-Training) embeddings. The model is fine-tuned on break dance videos to classify different power moves including windmills, halos, swipes, and baby mills.
+## Features
+- Video-based classification using CLIP embeddings
+- Multi-frame temporal analysis
+- Configurable frame sampling and data augmentation
+- Real-time inference using Cog
+- Misclassification analysis tools
+- Hyperparameter tuning support
+## Setup
+```bash
+# Install dependencies
+pip install -r requirements.txt
+# Install Cog (if not already installed)
+curl -o /usr/local/bin/cog -L https://github.com/replicate/cog/releases/latest/download/cog_`uname -s`_`uname -m`
+chmod +x /usr/local/bin/cog
+```
+## Training
+```bash
+# Run training with default configuration
+python scripts/train.py
+# Run hyperparameter tuning
+python scripts/hyperparameter_tuning.py
+```
+## Inference
+```bash
+# Using Cog for inference
+cog predict -i video=@path/to/your/video.mp4
+# Using standard Python script
+python scripts/inference.py --video path/to/your/video.mp4
+```
+## Analysis
+```bash
+# Generate misclassification report
+python scripts/visualization/miscalculations_report.py
+# Visualize model performance
+python scripts/visualization/visualize.py
+```
+## Project Structure
+```
+clip/
+├── src/                    # Source code
+│   ├── data/              # Dataset and data processing
+│   ├── models/            # Model architecture
+│   └── utils/             # Utility functions
+├── scripts/               # Training and inference scripts
+│   └── visualization/     # Visualization tools
+├── config/                # Configuration files
+├── runs/                  # Training runs and checkpoints
+├── cog.yaml              # Cog configuration
+└── requirements.txt      # Python dependencies
+```
+## Model Architecture
+- Base: CLIP ViT-Large/14
+- Custom temporal pooling layer
+- Fine-tuned vision encoder (last 3 layers)
+- Output: 4-class classifier
+## Performance
+- Training Accuracy: ~95%
+- Validation Accuracy: ~92%
+- Inference Time: ~100ms per video
+## Configuration
+Key hyperparameters can be modified in `config/default.yaml`:
+- Frame sampling: 10 frames per video
+- Image size: 224x224
+- Learning rate: 2e-6
+- Weight decay: 0.007
+- Data augmentation parameters
+## License
+[Your License Here]
+## Citation
+If you use this model in your research, please cite:
+```bibtex
+[Your Citation Here]
+```

cog.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+build:
+  gpu: true
+  cuda: "12.1"
+  python_version: "3.10"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_packages:
+    - "torch==2.3.0"
+    - "torchvision"
+    - "transformers"
+    - "opencv-python"
+    - "pillow"
+    - "numpy"
+    - "scipy"
+    - "huggingface_hub"
+predict: "predict.py:Predictor"

predict.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+from cog import BasePredictor, Input, Path
+import torch
+import json
+from src.models.model import load_model
+from src.data.video_utils import create_transform, extract_frames
+CHECKPOINT_DIR = "runs/run_20241024-150232_otherpeopleval_large_model/"
+class Predictor(BasePredictor):
+    def setup(self):
+        """Load the model into memory to make running multiple predictions efficient"""
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Using device: {self.device}")
+        # Load configuration from JSON
+        with open(
+            os.path.join(CHECKPOINT_DIR, "config.json"), 'r') as f:
+            self.config = json.load(f)
+        # Create transform
+        self.transform = create_transform(self.config, training=False)
+        # Load model
+        self.model = load_model(
+            self.config['num_classes'],
+            os.path.join(CHECKPOINT_DIR, "best_model.pth"),
+            self.device,
+            self.config['clip_model']
+        )
+        self.model.eval()
+    def predict(self, video: Path = Input(description="Input video file")) -> dict:
+        """Run a single prediction on the model"""
+        try:
+            # Extract frames using shared function with config
+            frames, success = extract_frames(
+                str(video),
+                self.config,
+                self.transform
+            )
+            if not success or frames is None:
+                raise ValueError(f"Failed to process video: {video}")
+            # Now frames is a tensor, not a tuple
+            frames = frames.unsqueeze(0).to(self.device)
+            # Get prediction
+            with torch.no_grad():
+                output = self.model(frames)
+                probabilities = torch.softmax(output, dim=1)
+                predicted_class = torch.argmax(probabilities, dim=1).item()
+                confidence = probabilities[0][predicted_class].item()
+                # Get all class confidences
+                all_confidences = {
+                    label: probabilities[0][i].item()
+                    for i, label in enumerate(self.config['class_labels'])
+                }
+            return {
+                "class": self.config['class_labels'][predicted_class],
+                "confidence": confidence,
+                "all_confidences": all_confidences
+            }
+        except Exception as e:
+            raise ValueError(f"Error processing video: {str(e)}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+alembic==1.13.3
+annotated-types==0.7.0
+anyio==4.6.2.post1
+attrs==23.2.0
+certifi==2024.8.30
+charset-normalizer==3.4.0
+click==8.1.7
+cog==0.12.0
+colorlog==6.9.0
+contourpy==1.3.0
+cycler==0.12.1
+fastapi==0.110.3
+filelock==3.16.1
+fonttools==4.54.1
+fsspec==2024.10.0
+greenlet==3.1.1
+h11==0.14.0
+httptools==0.6.4
+huggingface-hub==0.26.2
+idna==3.10
+Jinja2==3.1.4
+joblib==1.4.2
+kiwisolver==1.4.7
+Mako==1.3.6
+MarkupSafe==3.0.2
+matplotlib==3.9.2
+mpmath==1.3.0
+networkx==3.4.2
+nms_1d_cpu==0.0.0
+numpy==2.1.2
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+opencv-python==4.10.0.84
+optuna==4.0.0
+packaging==24.1
+pandas==2.2.3
+pillow==11.0.0
+pydantic==2.9.2
+pydantic_core==2.23.4
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.2
+PyYAML==6.0.2
+regex==2024.9.11
+requests==2.32.3
+safetensors==0.4.5
+scikit-learn==1.5.2
+scipy==1.14.1
+seaborn==0.13.2
+six==1.16.0
+sniffio==1.3.1
+SQLAlchemy==2.0.36
+starlette==0.37.2
+structlog==24.4.0
+sympy==1.13.1
+threadpoolctl==3.5.0
+tokenizers==0.20.1
+torch==2.5.1
+torchvision==0.20.1
+tqdm==4.66.6
+transformers==4.46.1
+triton==3.1.0
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+uvicorn==0.32.0
+uvloop==0.21.0
+watchfiles==0.24.0
+websockets==13.1

scripts/hyperparameter_tuning.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import optuna
+import os
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from scripts.train import train_and_evaluate
+from src.utils.utils import create_run_directory
+def objective(trial, hyperparam_run_dir):
+    config = {
+        "clip_model": trial.suggest_categorical("clip_model", ["openai/clip-vit-base-patch32", "openai/clip-vit-large-patch14"]),
+        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-6, 1e-4),
+        "weight_decay": trial.suggest_loguniform("weight_decay", 1e-8, 1e-1),
+        "unfreeze_layers": trial.suggest_int("unfreeze_layers", 1, 6),
+        "batch_size": trial.suggest_categorical("batch_size", [32, 64, 128]),
+        "gradient_clip_max_norm": trial.suggest_uniform("gradient_clip_max_norm", 0.1, 1.0),
+        "augmentation_strength": trial.suggest_float("augmentation_strength", 0.0, 1.0),
+        "crop_scale_min": trial.suggest_float("crop_scale_min", 0.6, 0.9),
+        "max_frames": trial.suggest_int("max_frames", 5, 15),
+        "sigma": trial.suggest_uniform("sigma", 0.1, 0.5),
+    }
+    class_labels = ["windmill", "halo", "swipe", "baby_mill"][:3]
+    # Fixed configurations
+    config.update({
+        "class_labels": class_labels,
+        "num_classes": len(class_labels),
+        "data_path": '../finetune/3moves_test',
+        "num_epochs": 50,  # Reduced for faster trials
+        "patience": 10,    # Adjusted for faster trials
+        "image_size": 224,
+        "crop_scale_max": 1.0,
+        "normalization_mean": [0.485, 0.456, 0.406],
+        "normalization_std": [0.229, 0.224, 0.225],
+        "overfitting_threshold": 10,
+    })
+    # Derive augmentation parameters from augmentation_strength
+    config.update({
+        "flip_probability": 0.5 * config["augmentation_strength"],
+        "rotation_degrees": int(15 * config["augmentation_strength"]),
+        "brightness_jitter": 0.2 * config["augmentation_strength"],
+        "contrast_jitter": 0.2 * config["augmentation_strength"],
+        "saturation_jitter": 0.2 * config["augmentation_strength"],
+        "hue_jitter": 0.1 * config["augmentation_strength"],
+    })
+    # Create a unique run directory for this trial
+    config["run_dir"] = create_run_directory(prefix=f"trial", parent_dir=hyperparam_run_dir)
+    # Run training and evaluation
+    val_accuracy = train_and_evaluate(config)
+    return val_accuracy
+def main():
+    # Set up the study and optimize
+    hyperparam_run_dir = create_run_directory(suffix='_hyperparam')
+    study = optuna.create_study(direction="maximize")
+    study.optimize(lambda trial: objective(trial, hyperparam_run_dir), n_trials=100)  # Adjust the number of trials as needed
+    # Save the study results
+    study.trials_dataframe().to_csv(os.path.join(hyperparam_run_dir, 'study_results.csv'))
+    print("Best trial:")
+    trial = study.best_trial
+    print("  Value: ", trial.value)
+    print("  Params: ")
+    for key, value in trial.params.items():
+        print("    {}: {}".format(key, value))
+    # Save the best trial parameters
+    with open(os.path.join(hyperparam_run_dir, 'best_params.txt'), 'w') as f:
+        for key, value in trial.params.items():
+            f.write(f"{key}: {value}\n")
+if __name__ == "__main__":
+    main()

scripts/inference.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+import numpy as np
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from src.utils.utils import get_latest_run_dir, get_latest_model_path, get_config
+from src.models.model import load_model
+from src.data.video_utils import create_transform, extract_frames
+def setup_model(run_dir=None):
+    """Setup model and configuration"""
+    # Define the device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Get run directory
+    if run_dir is None:
+        run_dir = get_latest_run_dir()
+    print(f"Using run directory: {run_dir}")
+    try:
+        # Load configuration
+        config = get_config(run_dir)
+        print(f"Loaded configuration from: {run_dir}")
+        # Load the model
+        model_path = get_latest_model_path(run_dir)
+        print(f"Loading model from: {model_path}")
+        model = load_model(
+            config['num_classes'],
+            model_path,
+            device,
+            config['clip_model']
+        )
+        model.eval()
+        return model, config, device
+    except (ValueError, FileNotFoundError) as e:
+        print(f"Error loading model: {str(e)}")
+        exit(1)
+def predict(video_path, model, config, device):
+    """Predict class for a video using the model"""
+    transform = create_transform(config, training=False)
+    try:
+        frames, success = extract_frames(video_path,
+                                      config,
+                                      transform)
+        if not success:
+            raise ValueError(f"Failed to process video: {video_path}")
+        frames = frames.to(device)
+        # Add batch dimension correctly
+        frames = frames.unsqueeze(0)  # Add batch dimension at the start
+        with torch.no_grad():
+            try:
+                outputs = model(frames)
+                probabilities = torch.nn.functional.softmax(outputs, dim=1)
+            except Exception as e:
+                print(f"Error during model forward pass: {str(e)}")
+                print(f"Model input shape: {frames.shape}")
+                raise
+        # Get predictions
+        avg_probabilities = probabilities[0].cpu().numpy()
+        predicted_class = np.argmax(avg_probabilities)
+        # Create a dictionary of class labels and their probabilities
+        class_probabilities = {
+            label: float(prob)
+            for label, prob in zip(config['class_labels'], avg_probabilities)
+        }
+        return config['class_labels'][predicted_class], class_probabilities
+    except Exception as e:
+        raise ValueError(f"Error processing video: {str(e)}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Run inference on a video file')
+    parser.add_argument('--video', type=str, required=True,
+                        help='Path to the video file')
+    parser.add_argument('--run-dir', type=str,
+                        help='Path to specific run directory (optional)')
+    args = parser.parse_args()
+    # Setup model and config
+    model, config, device = setup_model(args.run_dir)
+    try:
+        predicted_label, class_probabilities = predict(args.video, model, config, device)
+        print(f"\nPredicted label: {predicted_label}")
+        print("\nClass probabilities:")
+        for label, prob in class_probabilities.items():
+            print(f"  {label}: {prob:.4f}")
+    except ValueError as e:
+        print(f"Error: {str(e)}")

scripts/train.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import torch
+from torch.utils.data import DataLoader
+from torch.nn.utils import clip_grad_norm_
+from tqdm import tqdm
+import os
+import logging
+import csv
+import json
+from torch.optim.lr_scheduler import CosineAnnealingLR
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from src.utils.utils import create_run_directory
+from src.data.dataset import VideoDataset
+from src.models.model import create_model
+from src.data.video_utils import create_transform
+def train_and_evaluate(config):
+    # Create a run directory if it doesn't exist
+    if "run_dir" not in config:
+        config["run_dir"] = create_run_directory()
+    # Update paths based on run_dir
+    config.update({
+        "best_model_path": os.path.join(config["run_dir"], 'best_model.pth'),
+        "final_model_path": os.path.join(config["run_dir"], 'final_model.pth'),
+        "csv_path": os.path.join(config["run_dir"], 'training_log.csv'),
+        "misclassifications_dir": os.path.join(config["run_dir"], 'misclassifications'),
+    })
+    config_path = os.path.join(config["run_dir"], 'config.json')
+    with open(config_path, 'w') as f:
+        json.dump(config, f, indent=2)
+    # Set up logging
+    logging.basicConfig(level=logging.INFO,
+                        format='%(asctime)s - %(levelname)s - %(message)s',
+                        handlers=[logging.FileHandler(os.path.join(config["run_dir"], 'training.log')),
+                                logging.StreamHandler()])
+    logger = logging.getLogger(__name__)
+    # Set device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # Initialize variables
+    best_val_loss = float('inf')
+    epochs_without_improvement = 0
+    model = create_model(config["num_classes"], config["clip_model"])
+    # Unfreeze the last 2 layers of the vision encoder
+    model.unfreeze_vision_encoder(num_layers=config["unfreeze_layers"])
+    # Move model to device
+    model = model.to(device)
+    logger.info(f"Model architecture:\n{model}")
+    # Load datasets
+    train_dataset = VideoDataset(
+        os.path.join(config['data_path'], 'train.csv'),
+        config=config
+    )
+    # For validation, create a new config with training=False for transforms
+    val_config = config.copy()
+    val_dataset = VideoDataset(
+        os.path.join(config['data_path'], 'val.csv'),
+        config=val_config,
+        transform=create_transform(config, training=False)
+    )
+    # Create data loaders
+    train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)
+    # Define optimizer and learning rate scheduler
+    optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
+    scheduler = CosineAnnealingLR(optimizer, T_max=config["num_epochs"])
+    # Open a CSV file to log training progress
+    with open(config["csv_path"], 'w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(["epoch", "train_loss", "train_accuracy", "val_loss", "val_accuracy"])
+    # Function to calculate accuracy
+    def calculate_accuracy(outputs, labels):
+        _, predicted = torch.max(outputs, 1)
+        correct = (predicted == labels).sum().item()
+        total = labels.size(0)
+        return correct / total
+    def log_misclassifications(outputs, labels, video_paths, dataset, misclassified_videos):
+        _, predicted = torch.max(outputs, 1)
+        for pred, label, video_path in zip(predicted, labels, video_paths):
+            if pred != label:
+                true_label = dataset.label_map[label.item()]
+                predicted_label = dataset.label_map[pred.item()]
+                misclassified_videos.append({
+                    'video_path': video_path,
+                    'true_label': true_label,
+                    'predicted_label': predicted_label
+                })
+    # Create a subfolder for misclassification logs
+    os.makedirs(config["misclassifications_dir"], exist_ok=True)
+    # Training loop
+    for epoch in range(config["num_epochs"]):
+        model.train()
+        total_loss = 0
+        total_accuracy = 0
+        for frames, labels, video_paths in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{config['num_epochs']}"):
+            frames = frames.to(device)
+            labels = labels.to(device)
+            logits = model(frames)
+            loss = torch.nn.functional.cross_entropy(logits, labels)
+            accuracy = calculate_accuracy(logits, labels)
+            optimizer.zero_grad()
+            loss.backward()
+            clip_grad_norm_(model.parameters(), max_norm=config["gradient_clip_max_norm"])
+            optimizer.step()
+            total_loss += loss.item()
+            total_accuracy += accuracy
+        avg_train_loss = total_loss / len(train_loader)
+        avg_train_accuracy = total_accuracy / len(train_loader)
+        # Validation
+        model.eval()
+        val_loss = 0
+        val_accuracy = 0
+        misclassified_videos = []
+        with torch.no_grad():
+            for frames, labels, video_paths in val_loader:
+                frames = frames.to(device)
+                labels = labels.to(device)
+                logits = model(frames)
+                loss = torch.nn.functional.cross_entropy(logits, labels)
+                accuracy = calculate_accuracy(logits, labels)
+                val_loss += loss.item()
+                val_accuracy += accuracy
+                # Log misclassifications
+                log_misclassifications(logits, labels, video_paths, val_dataset, misclassified_videos)
+        avg_val_loss = val_loss / len(val_loader)
+        avg_val_accuracy = val_accuracy / len(val_loader)
+        # Log misclassified videos
+        if misclassified_videos:
+            misclassified_log_path = os.path.join(config["misclassifications_dir"], f'epoch_{epoch+1}.json')
+            with open(misclassified_log_path, 'w') as f:
+                json.dump(misclassified_videos, f, indent=2)
+            logger.info(f"Logged {len(misclassified_videos)} misclassified videos to {misclassified_log_path}")
+        # Log the metrics
+        logger.info(f"Epoch [{epoch+1}/{config['num_epochs']}], "
+                    f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {avg_train_accuracy*100:.2f}%, "
+                    f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {avg_val_accuracy*100:.2f}%")
+        # Write to CSV
+        with open(config["csv_path"], 'a', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow([epoch+1, avg_train_loss, avg_train_accuracy*100, avg_val_loss, avg_val_accuracy*100])
+        # Learning rate scheduling
+        scheduler.step()
+        # Save the best model and check for early stopping
+        if avg_val_loss < best_val_loss:
+            best_val_loss = avg_val_loss
+            torch.save(model.state_dict(), config["best_model_path"])
+            logger.info(f"Saved best model to {config['best_model_path']}")
+            epochs_without_improvement = 0
+        else:
+            epochs_without_improvement += 1
+        # Early stopping check
+        if epochs_without_improvement >= config["patience"]:
+            logger.info(f"Early stopping triggered after {config['patience']} epochs without improvement")
+            break
+        # Overfitting detection
+        if avg_train_accuracy - avg_val_accuracy > config["overfitting_threshold"]:
+            logger.warning("Possible overfitting detected")
+    logger.info("Training finished!")
+    # Save the final model
+    torch.save(model.state_dict(), config["final_model_path"])
+    logger.info(f"Saved final model to {config['final_model_path']}")
+    # Save run information
+    with open(os.path.join(config["run_dir"], 'run_info.txt'), 'w') as f:
+        for key, value in config.items():
+            f.write(f"{key}: {value}\n")
+        f.write(f"Device: {device}\n")
+        f.write(f"Model: {model.__class__.__name__}\n")
+        f.write(f"Optimizer: {optimizer.__class__.__name__}\n")
+        f.write(f"Scheduler: {scheduler.__class__.__name__}\n")
+        f.write(f"Loss function: CrossEntropyLoss\n")
+        f.write(f"Data augmentation: RandomHorizontalFlip, RandomRotation(5), ColorJitter\n")
+        f.write(f"Mixed precision training: {'Enabled' if 'scaler' in locals() else 'Disabled'}\n")
+        f.write(f"Train dataset size: {len(train_dataset)}\n")
+        f.write(f"Validation dataset size: {len(val_dataset)}\n")
+        f.write(f"Vision encoder frozen: {'Partially' if hasattr(model, 'unfreeze_vision_encoder') else 'Unknown'}\n")
+    print("Script finished.")
+    return avg_val_accuracy
+def main():
+    # Create run directory
+    run_dir = create_run_directory()
+    class_labels = ["windmill", "halo", "swipe", "baby_mill"][:3]
+    # Write configuration
+    config = {
+        "class_labels": class_labels,
+        "num_classes": len(class_labels),
+        "data_path": '../finetune/3moves_otherpeopletrain',
+        "batch_size": 32,
+        "learning_rate": 2e-6,
+        "weight_decay": 0.007,
+        "num_epochs": 1,
+        "patience": 10,  # for early stopping
+        "max_frames": 10,
+        "sigma": 0.3,
+        "image_size": 224,
+        "flip_probability": 0.5,
+        "rotation_degrees": 15,
+        "brightness_jitter": 0.2,
+        "contrast_jitter": 0.2,
+        "saturation_jitter": 0.2,
+        "hue_jitter": 0.1,
+        "crop_scale_min": 0.8,
+        "crop_scale_max": 1.0,
+        "normalization_mean": [0.485, 0.456, 0.406],
+        "normalization_std": [0.229, 0.224, 0.225],
+        "unfreeze_layers": 3,
+        "clip_model": "openai/clip-vit-large-patch14",
+        # "clip_model": "openai/clip-vit-base-patch32",
+        "gradient_clip_max_norm": 1.0,
+        "overfitting_threshold": 10,
+        "run_dir": run_dir,
+        "best_model_path": os.path.join(run_dir, 'best_model.pth'),
+        "final_model_path": os.path.join(run_dir, 'final_model.pth'),
+        "csv_path": os.path.join(run_dir, 'training_log.csv'),
+        "misclassifications_dir": os.path.join(run_dir, 'misclassifications'),
+    }
+    train_and_evaluate(config)
+if __name__ == "__main__":
+    main()

scripts/visualization/miscalculations_report.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import os
+import json
+from collections import Counter
+import matplotlib.pyplot as plt
+from pathlib import Path
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from src.utils.utils import get_latest_run_dir
+def analyze_misclassifications(run_dir=None):
+    if run_dir is None:
+        # run_dir = "/home/bawolf/workspace/break/clip/runs/run_20241022-122939_3moves_balanced"
+        run_dir =  get_latest_run_dir()
+    misclassifications_dir = os.path.join(run_dir, 'misclassifications')
+    all_misclassifications = {}
+    # Collect all misclassifications across epochs
+    for file in os.listdir(misclassifications_dir):
+        if file.endswith('.json'):
+            with open(os.path.join(misclassifications_dir, file), 'r') as f:
+                epoch_misclassifications = json.load(f)
+                for item in epoch_misclassifications:
+                    video_path = item['video_path']
+                    if video_path not in all_misclassifications:
+                        all_misclassifications[video_path] = []
+                    all_misclassifications[video_path].append(item)
+    # Determine the total number of epochs from the files
+    epoch_files = [f for f in os.listdir(misclassifications_dir) if f.startswith('epoch_') and f.endswith('.json')]
+    total_epochs = len(epoch_files)
+    # Count misclassifications per video
+    misclassification_counts = {video: len(misclassifications)
+                                for video, misclassifications in all_misclassifications.items()}
+    # Calculate percentage of epochs each video was misclassified
+    misclassification_percentages = {video: (count / total_epochs) * 100
+                                     for video, count in misclassification_counts.items()}
+    # Sort videos by misclassification percentage
+    sorted_videos = sorted(misclassification_percentages.items(), key=lambda x: x[1], reverse=True)
+    # Prepare report
+    report = "Misclassification Analysis Report\n"
+    report += "=================================\n\n"
+    # Top N most misclassified videos
+    N = 20
+    report += f"Top {N} Most Misclassified Videos:\n"
+    for video, percentage in sorted_videos[:N]:
+        report += f"{Path(video).name}: Misclassified in {percentage:.2f}% of epochs ({misclassification_counts[video]} out of {total_epochs})\n"
+        misclassifications = all_misclassifications[video]
+        true_label = misclassifications[0]['true_label']
+        predicted_labels = Counter(m['predicted_label'] for m in misclassifications)
+        report += f"  True Label: {true_label}\n"
+        report += f"  Predicted Labels: {dict(predicted_labels)}\n\n"
+    # Overall statistics
+    total_misclassifications = sum(misclassification_counts.values())
+    total_videos = len(misclassification_counts)
+    report += "Overall Statistics:\n"
+    report += f"Total misclassified videos: {total_videos}\n"
+    report += f"Total misclassifications: {total_misclassifications}\n"
+    report += f"Average misclassification percentage per video: {sum(misclassification_percentages.values()) / total_videos:.2f}%\n"
+    report += f"Total epochs: {total_epochs}\n"
+    # Save report
+    report_path = os.path.join(run_dir, 'misclassification_report.txt')
+    with open(report_path, 'w') as f:
+        f.write(report)
+    # Create visualization
+    plt.figure(figsize=(12, 6))
+    plt.bar(range(len(sorted_videos)), [percentage for _, percentage in sorted_videos])
+    plt.title(f'Videos Ranked by Misclassification Percentage (Total Epochs: {total_epochs})')
+    plt.xlabel('Video Rank')
+    plt.ylabel('Misclassification Percentage')
+    plt.ylim(0, 100)  # Set y-axis limit to 0-100%
+    plt.tight_layout()
+    plt.savefig(os.path.join(run_dir, 'misclassification_distribution.png'))
+    print(f"Analysis complete. Report saved to {report_path}")
+    print(f"Visualization saved to {os.path.join(run_dir, 'misclassification_distribution.png')}")
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 2:
+        print("Usage: python analyze_misclassifications.py [path_to_run_directory]")
+        sys.exit(1)
+    run_dir = sys.argv[1] if len(sys.argv) == 2 else None
+    analyze_misclassifications(run_dir)

scripts/visualization/visualize.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import torch
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import confusion_matrix, precision_recall_curve, average_precision_score, roc_curve, auc
+from torch.utils.data import DataLoader
+import pandas as pd
+import numpy as np
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from src.data.dataset import VideoDataset
+from src.utils.utils import get_latest_model_path, get_latest_run_dir, get_config
+from src.models.model import load_model
+import json
+def plot_training_curves(log_file, output_dir):
+    data = pd.read_csv(log_file)
+    plt.figure(figsize=(12, 5))
+    # Plot loss curves
+    plt.subplot(1, 2, 1)
+    plt.plot(data['epoch'], data['train_loss'], label='Train Loss')
+    plt.plot(data['epoch'], data['val_loss'], label='Validation Loss')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.title('Training and Validation Loss')
+    plt.legend()
+    # Plot accuracy curves
+    plt.subplot(1, 2, 2)
+    plt.plot(data['epoch'], data['train_accuracy'], label='Train Accuracy')
+    plt.plot(data['epoch'], data['val_accuracy'], label='Validation Accuracy')
+    plt.xlabel('Epochs')
+    plt.ylabel('Accuracy')
+    plt.title('Training and Validation Accuracy')
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(os.path.join(output_dir, 'training_curves.png'))
+    plt.close()
+def generate_evaluation_metrics(model, data_loader, device, output_dir, class_labels, data_info):
+    model.eval()
+    all_preds = []
+    all_labels = []
+    all_probs = []
+    with torch.no_grad():
+        for frames, labels, _ in data_loader:
+            frames = frames.to(device)
+            labels = labels.to(device)
+            outputs = model(frames)
+            probs = torch.softmax(outputs, dim=1)
+            _, predicted = outputs.max(1)
+            all_preds.extend(predicted.cpu().numpy())
+            all_labels.extend(labels.cpu().numpy())
+            all_probs.extend(probs.cpu().numpy())
+    all_labels = np.array(all_labels)
+    all_preds = np.array(all_preds)
+    all_probs = np.array(all_probs)
+    # Compute and plot confusion matrix
+    cm = confusion_matrix(all_labels, all_preds)
+    plt.figure(figsize=(10, 8))
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
+    plt.xlabel('Predicted Value')
+    plt.ylabel('Actual Value')
+    plt.title(f'Confusion Matrix\n{data_info}')
+    plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
+    plt.close()
+    colors = ['blue', 'red', 'green', 'yellow', 'purple', 'orange', 'pink', 'cyan']
+    # Precision-Recall Curve
+    plt.figure(figsize=(10, 8))
+    for i, class_label in enumerate(class_labels):
+        precision, recall, _ = precision_recall_curve(all_labels == i, all_probs[:, i])
+        average_precision = average_precision_score(all_labels == i, all_probs[:, i])
+        plt.plot(recall, precision, color=colors[i], lw=2,
+                 label=f'{class_label} (AP = {average_precision:.2f})')
+    plt.xlabel('Recall')
+    plt.ylabel('Precision')
+    plt.title(f'Precision-Recall Curve\n{data_info}')
+    plt.legend(loc="lower left")
+    plt.savefig(f'{output_dir}/precision_recall_curve.png')
+    plt.close()
+    # ROC Curve
+    plt.figure(figsize=(10, 8))
+    for i, class_label in enumerate(class_labels):
+        fpr, tpr, _ = roc_curve(all_labels == i, all_probs[:, i])
+        roc_auc = auc(fpr, tpr)
+        plt.plot(fpr, tpr, color=colors[i], lw=2,
+                 label=f'{class_label} (AUC = {roc_auc:.2f})')
+    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title(f'Receiver Operating Characteristic (ROC) Curve\n{data_info}')
+    plt.legend(loc="lower right")
+    plt.savefig(f'{output_dir}/roc_curve.png')
+    plt.close()
+    return cm
+if __name__ == "__main__":
+    # Find the most recent run directory
+    #
+    run_dir = get_latest_run_dir()
+    # run_dir= "/home/bawolf/workspace/break/clip/runs/run_20241024-150232_otherpeopleval_large_model"
+    # run_dir = "/home/bawolf/workspace/break/clip/runs/run_20241022-122939_3moves_balanced"
+    # Load configuration
+    config = get_config(run_dir)
+    class_labels = config['class_labels']
+    num_classes = config['num_classes']
+    data_path = config['data_path']
+    # data_path= '../finetune/3moves_otherpeopleval'
+    # data_path = '../finetune/otherpeople3moves'
+    # Paths
+    log_file = os.path.join(run_dir, 'training_log.csv')
+    model_path = get_latest_model_path(run_dir)
+    test_csv = os.path.join(data_path, 'test.csv')
+    # test_csv = os.path.join(data_path, 'val.csv')
+    # test_csv = os.path.join(data_path, 'train.csv')
+    # Get the last directory of data_path and the file name
+    last_dir = os.path.basename(os.path.normpath(data_path))
+    file_name = os.path.basename(test_csv)
+    # Create a directory for visualization outputs
+    vis_dir = os.path.join(run_dir, f'visualization_{last_dir}_{file_name.split(".")[0]}')
+    os.makedirs(vis_dir, exist_ok=True)
+    # Create data_info string for chart headers
+    data_info = f'Data: {last_dir}, File: {file_name}'
+    # Plot training curves
+    plot_training_curves(log_file, vis_dir)
+    # Load model
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = load_model(num_classes, model_path, device, config['clip_model'])
+    model.eval()
+    # Create test dataset and dataloader
+    test_dataset = VideoDataset(test_csv, config)
+    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)
+    # Generate evaluation metrics
+    cm = generate_evaluation_metrics(model, test_loader, device, vis_dir, class_labels, data_info)
+    print(f"Visualization complete! Check the output directory: {vis_dir}")

src/models/__init__.py ADDED Viewed

File without changes

src/models/model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+import torch.nn as nn
+from transformers import CLIPModel
+class VariableLengthCLIP(nn.Module):
+    def __init__(self, clip_model, num_classes):
+        super().__init__()
+        self.clip_model = clip_model
+        self.visual_projection = nn.Linear(clip_model.visual_projection.in_features, num_classes)
+    def forward(self, x):
+        batch_size, num_frames, c, h, w = x.shape
+        x = x.view(batch_size * num_frames, c, h, w)
+        features = self.clip_model.vision_model(x).pooler_output
+        features = features.view(batch_size, num_frames, -1)
+        features = torch.mean(features, dim=1)  # Average over frames
+        return self.visual_projection(features)
+    def unfreeze_vision_encoder(self, num_layers=2):
+        # Freeze the entire vision encoder
+        for param in self.clip_model.vision_model.parameters():
+            param.requires_grad = False
+        # Unfreeze the last few layers of the vision encoder
+        for param in self.clip_model.vision_model.encoder.layers[-num_layers:].parameters():
+            param.requires_grad = True
+def create_model(num_classes, pretrained_model_name="openai/clip-vit-base-patch32"):
+    clip_model = CLIPModel.from_pretrained(pretrained_model_name)
+    return VariableLengthCLIP(clip_model, num_classes)
+def load_model(num_classes, model_path, device, pretrained_model_name="openai/clip-vit-base-patch32"):
+    # Create the model
+    model = create_model(num_classes, pretrained_model_name)
+    # Load the state dict
+    state_dict = torch.load(model_path, map_location=device, weights_only=True)
+    # Load the state dict, ignoring mismatched keys
+    model.load_state_dict(state_dict, strict=False)
+    model.to(device)  # Move the model to the appropriate device
+    return model

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+import json
+from datetime import datetime
+def create_run_directory(base_dir='runs', prefix='run', suffix='', parent_dir=None):
+    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+    dir_name = f"{prefix}_{timestamp}{suffix}"
+    if parent_dir:
+        run_dir = os.path.join(parent_dir, dir_name)
+    else:
+        run_dir = os.path.join(base_dir, dir_name)
+    os.makedirs(run_dir, exist_ok=True)
+    return run_dir
+# Find the most recent run directory
+def get_latest_run_dir(base_dir='runs', include_hyperparam=True):
+    all_dirs = []
+    for d in os.listdir(base_dir):
+        if d.startswith('run_'):
+            full_path = os.path.join(base_dir, d)
+            all_dirs.append(full_path)
+            if d.endswith('_hyperparam') and include_hyperparam:
+                # If it's a hyperparam directory, add its trial subdirectories
+                trial_dirs = [os.path.join(full_path, td) for td in os.listdir(full_path) if td.startswith('trial_')]
+                all_dirs.extend(trial_dirs)
+    if not all_dirs:
+        raise ValueError(f"No run directories found in {base_dir}")
+    # Sort directories by timestamp in the directory name
+    return max(all_dirs, key=get_dir_timestamp)
+def get_run_file(filename, run_dir=None, required=True):
+    """Get a file from a run directory
+    Args:
+        filename: Name of file to get (e.g., 'best_model.pth', 'config.json')
+        run_dir: Run directory path (uses latest if None)
+        required: Whether to raise an error if file not found
+    Returns:
+        str: Path to the file
+        dict: Loaded JSON data if file ends with .json
+    """
+    if run_dir is None:
+        run_dir = get_latest_run_dir()
+    file_path = os.path.join(run_dir, filename)
+    if not os.path.exists(file_path):
+        if required:
+            raise FileNotFoundError(f"{filename} not found in {run_dir}")
+        return None
+    # Load JSON files automatically
+    if filename.endswith('.json'):
+        with open(file_path, 'r') as f:
+            return json.load(f)
+    return file_path
+def get_latest_model_path(run_dir=None):
+    """Get path to best_model.pth"""
+    return get_run_file('best_model.pth', run_dir)
+def get_config(run_dir=None):
+    """Get config from run directory"""
+    return get_run_file('config.json', run_dir)
+# Helper function to parse directory name and get timestamp
+def get_dir_timestamp(dir_path):
+    dir_name = os.path.basename(dir_path)
+    try:
+        # Extract timestamp from directory name
+        timestamp_str = dir_name.split('_')[1]  # Assumes format is always prefix_timestamp or prefix_timestamp_suffix
+        return datetime.strptime(timestamp_str, "%Y%m%d-%H%M%S")
+    except (IndexError, ValueError):
+        # If parsing fails, return the earliest possible date
+        return datetime.min