Spaces:

DevSecOpAI
/

PyCIL

Runtime error

App Files Files Community

HungNP commited on Jun 29, 2024

Commit

cb80c28

0 Parent(s):

New single commit message

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/push-huggingface.yml +22 -0
.gitignore +4 -0
Dockerfile +30 -0
LICENSE +43 -0
README.md +248 -0
convs/__init__.py +0 -0
convs/cifar_resnet.py +207 -0
convs/conv_cifar.py +77 -0
convs/conv_imagenet.py +82 -0
convs/linears.py +167 -0
convs/memo_cifar_resnet.py +164 -0
convs/memo_resnet.py +322 -0
convs/modified_represnet.py +177 -0
convs/resnet.py +395 -0
convs/resnet_cbam.py +267 -0
convs/ucir_cifar_resnet.py +204 -0
convs/ucir_resnet.py +299 -0
download_dataset.sh +8 -0
download_file_from_s3.py +49 -0
download_s3_path.py +58 -0
entrypoint.sh +8 -0
eval.py +133 -0
exps/beef.json +28 -0
exps/bic.json +14 -0
exps/coil.json +18 -0
exps/der.json +14 -0
exps/ewc.json +14 -0
exps/fetril.json +21 -0
exps/finetune.json +14 -0
exps/foster.json +31 -0
exps/foster_general.json +31 -0
exps/gem.json +14 -0
exps/icarl.json +15 -0
exps/il2a.json +24 -0
exps/lwf.json +14 -0
exps/memo.json +33 -0
exps/pass.json +23 -0
exps/podnet.json +14 -0
exps/replay.json +14 -0
exps/rmm-foster.json +31 -0
exps/rmm-icarl.json +15 -0
exps/rmm-pretrain.json +10 -0
exps/simplecil.json +23 -0
exps/simplecil_general.json +22 -0
exps/simplecil_resume.json +24 -0
exps/ssre.json +25 -0
exps/wa.json +14 -0
inference.py +115 -0
install_awscli.sh +7 -0
load.sh +5 -0

.github/workflows/push-huggingface.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: Push to Hugging Face
+on:
+  push:
+    branches: [ "master" ]
+jobs:
+  push:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Push repository to Hugging Face
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      run: |
+        git config --global user.email "phuochungus@gmail.com"
+        git config --global user.name "HungNP"
+        git remote add space https://huggingface.co/spaces/phuochungus/PyCIL_Stanford_Car
+        git checkout -b main
+        git reset $(git commit-tree HEAD^{tree} -m "New single commit message")
+        git push --force https://phuochungus:$HF_TOKEN@huggingface.co/spaces/phuochungus/PyCIL_Stanford_Car main
+        git push --force https://phuochungus:$HF_TOKEN@huggingface.co/spaces/DevSecOpAI/PyCIL main

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+data/
+__pycache__/
+logs/
+.env

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM python:3.8.5
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME
+RUN apt-get update && apt-get install -y unzip
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install Cython
+RUN pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+COPY --chown=user requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+COPY --chown=user download_dataset.sh download_dataset.sh
+RUN chmod +x download_dataset.sh
+RUN ./download_dataset.sh
+COPY --chown=user . .
+RUN chmod +x install_awscli.sh && ./install_awscli.sh
+RUN chmod +x entrypoint.sh upload_s3.sh simple_train.sh train_from_working.sh
+ENTRYPOINT [ "./entrypoint.sh" ]

LICENSE ADDED Viewed

	@@ -0,0 +1,43 @@

+MIT License
+Copyright (c) 2020 Changhong Zhong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+MIT License
+Copyright (c) 2021 Fu-Yun Wang.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,248 @@

+---
+title: Pycil
+emoji: 🍳
+colorFrom: red
+colorTo: red
+sdk: docker
+pinned: false
+---
+# PyCIL: A Python Toolbox for Class-Incremental Learning
+---
+<p align="center">
+  <a href="#Introduction">Introduction</a> •
+  <a href="#Methods-Reproduced">Methods Reproduced</a> •
+  <a href="#Reproduced-Results">Reproduced Results</a> •
+  <a href="#how-to-use">How To Use</a> •
+  <a href="#license">License</a> •
+  <a href="#Acknowledgments">Acknowledgments</a> •
+  <a href="#Contact">Contact</a>
+</p>
+<div align="center">
+<img src="./resources/logo.png" width="200px">
+</div>
+---
+<div align="center">
+[![LICENSE](https://img.shields.io/badge/license-MIT-green?style=flat-square)](https://github.com/yaoyao-liu/class-incremental-learning/blob/master/LICENSE)[![Python](https://img.shields.io/badge/python-3.8-blue.svg?style=flat-square&logo=python&color=3776AB&logoColor=3776AB)](https://www.python.org/) [![PyTorch](https://img.shields.io/badge/pytorch-1.8-%237732a8?style=flat-square&logo=PyTorch&color=EE4C2C)](https://pytorch.org/) [![method](https://img.shields.io/badge/Reproduced-20-success)]() [![CIL](https://img.shields.io/badge/ClassIncrementalLearning-SOTA-success??style=for-the-badge&logo=appveyor)](https://paperswithcode.com/task/incremental-learning)
+![visitors](https://visitor-badge.laobi.icu/badge?page_id=LAMDA.PyCIL&left_color=green&right_color=red)
+</div>
+Welcome to PyCIL, perhaps the toolbox for class-incremental learning with the **most** implemented methods. This is the code repository for "PyCIL: A Python Toolbox for Class-Incremental Learning" [[paper]](https://arxiv.org/abs/2112.12533) in PyTorch. If you use any content of this repo for your work, please cite the following bib entry:
+    @article{zhou2023pycil,
+        author = {Da-Wei Zhou and Fu-Yun Wang and Han-Jia Ye and De-Chuan Zhan},
+        title = {PyCIL: a Python toolbox for class-incremental learning},
+        journal = {SCIENCE CHINA Information Sciences},
+        year = {2023},
+        volume = {66},
+        number = {9},
+        pages = {197101-},
+        doi = {https://doi.org/10.1007/s11432-022-3600-y}
+      }
+    @article{zhou2023class,
+        author = {Zhou, Da-Wei and Wang, Qi-Wei and Qi, Zhi-Hong and Ye, Han-Jia and Zhan, De-Chuan and Liu, Ziwei},
+        title = {Deep Class-Incremental Learning: A Survey},
+        journal = {arXiv preprint arXiv:2302.03648},
+        year = {2023}
+     }
+## What's New
+- [2024-03]🌟 Check out our [latest work](https://arxiv.org/abs/2403.12030) on pre-trained model-based class-incremental learning!
+- [2024-01]🌟 Check out our [latest survey](https://arxiv.org/abs/2401.16386) on pre-trained model-based continual learning!
+- [2023-09]🌟 We have released [PILOT](https://github.com/sun-hailong/LAMDA-PILOT) toolbox for class-incremental learning with pre-trained models. Have a try!
+- [2023-07]🌟 Add [MEMO](https://openreview.net/forum?id=S07feAlQHgM), [BEEF](https://openreview.net/forum?id=iP77_axu0h3), and [SimpleCIL](https://arxiv.org/abs/2303.07338). State-of-the-art methods of 2023!
+- [2023-05]🌟 Check out our recent work about [class-incremental learning with vision-language models](https://arxiv.org/abs/2305.19270)!
+- [2023-02]🌟 Check out our [rigorous and unified survey](https://arxiv.org/abs/2302.03648) about class-incremental learning, which introduces some memory-agnostic measures with holistic evaluations from multiple aspects!
+- [2022-12]🌟 Add FrTrIL, PASS, IL2A, and SSRE.
+- [2022-10]🌟 PyCIL has been published in [SCIENCE CHINA Information Sciences](https://link.springer.com/article/10.1007/s11432-022-3600-y). Check out the [official introduction](https://mp.weixin.qq.com/s/h1qu2LpdvjeHAPLOnG478A)!
+- [2022-08]🌟 Add RMM.
+- [2022-07]🌟 Add [FOSTER](https://arxiv.org/abs/2204.04662). State-of-the-art method with a single backbone!
+- [2021-12]🌟 **Call For Feedback**: We add a <a href="#Awesome-Papers-using-PyCIL">section</a> to introduce awesome works using PyCIL. If you are using PyCIL to publish your work in  top-tier conferences/journals, feel free to [contact us](mailto:zhoudw@lamda.nju.edu.cn) for details!
+## Introduction
+Traditional machine learning systems are deployed under the closed-world setting, which requires the entire training data before the offline training process. However, real-world applications often face the incoming new classes, and a model should incorporate them continually. The learning paradigm is called Class-Incremental Learning (CIL). We propose a Python toolbox that implements several key algorithms for class-incremental learning to ease the burden of researchers in the machine learning community. The toolbox contains implementations of a number of founding works of CIL, such as EWC and iCaRL, but also provides current state-of-the-art algorithms that can be used for conducting novel fundamental research. This toolbox, named PyCIL for Python Class-Incremental Learning, is open source with an MIT license.
+For more information about incremental learning, you can refer to these reading materials:
+- A brief introduction (in Chinese) about CIL is available [here](https://zhuanlan.zhihu.com/p/490308909).
+- A PyTorch Tutorial to Class-Incremental Learning (with explicit codes and detailed explanations) is available [here](https://github.com/G-U-N/a-PyTorch-Tutorial-to-Class-Incremental-Learning).
+## Methods Reproduced
+-  `FineTune`: Baseline method which simply updates parameters on new tasks.
+-  `EWC`: Overcoming catastrophic forgetting in neural networks. PNAS2017 [[paper](https://arxiv.org/abs/1612.00796)]
+-  `LwF`:  Learning without Forgetting. ECCV2016 [[paper](https://arxiv.org/abs/1606.09282)]
+-  `Replay`: Baseline method with exemplar replay.
+-  `GEM`: Gradient Episodic Memory for Continual Learning. NIPS2017 [[paper](https://arxiv.org/abs/1706.08840)]
+-  `iCaRL`: Incremental Classifier and Representation Learning. CVPR2017 [[paper](https://arxiv.org/abs/1611.07725)]
+-  `BiC`: Large Scale Incremental Learning. CVPR2019 [[paper](https://arxiv.org/abs/1905.13260)]
+-  `WA`: Maintaining Discrimination and Fairness in Class Incremental Learning. CVPR2020 [[paper](https://arxiv.org/abs/1911.07053)]
+-  `PODNet`: PODNet: Pooled Outputs Distillation for Small-Tasks Incremental Learning. ECCV2020 [[paper](https://arxiv.org/abs/2004.13513)]
+-  `DER`: DER: Dynamically Expandable Representation for Class Incremental Learning. CVPR2021 [[paper](https://arxiv.org/abs/2103.16788)]
+-  `PASS`: Prototype Augmentation and Self-Supervision for Incremental Learning. CVPR2021 [[paper](https://openaccess.thecvf.com/content/CVPR2021/papers/Zhu_Prototype_Augmentation_and_Self-Supervision_for_Incremental_Learning_CVPR_2021_paper.pdf)]
+-  `RMM`: RMM: Reinforced Memory Management for Class-Incremental Learning. NeurIPS2021 [[paper](https://proceedings.neurips.cc/paper/2021/hash/1cbcaa5abbb6b70f378a3a03d0c26386-Abstract.html)]
+-  `IL2A`: Class-Incremental Learning via Dual Augmentation. NeurIPS2021 [[paper](https://proceedings.neurips.cc/paper/2021/file/77ee3bc58ce560b86c2b59363281e914-Paper.pdf)]
+-  `SSRE`: Self-Sustaining Representation Expansion for Non-Exemplar Class-Incremental Learning. CVPR2022 [[paper](https://arxiv.org/abs/2203.06359)]
+-  `FeTrIL`: Feature Translation for Exemplar-Free Class-Incremental Learning. WACV2023 [[paper](https://arxiv.org/abs/2211.13131)]
+-  `Coil`: Co-Transport for Class-Incremental Learning. ACM MM2021 [[paper](https://arxiv.org/abs/2107.12654)]
+-  `FOSTER`: Feature Boosting and Compression for Class-incremental Learning. ECCV 2022 [[paper](https://arxiv.org/abs/2204.04662)]
+-  `MEMO`: A Model or 603 Exemplars: Towards Memory-Efficient Class-Incremental Learning. ICLR 2023 Spotlight [[paper](https://openreview.net/forum?id=S07feAlQHgM)]
+-  `BEEF`: BEEF: Bi-Compatible Class-Incremental Learning via Energy-Based Expansion and Fusion. ICLR 2023 [[paper](https://openreview.net/forum?id=iP77_axu0h3)]
+-  `SimpleCIL`: Revisiting Class-Incremental Learning with Pre-Trained Models: Generalizability and Adaptivity are All You Need. arXiv 2023 [[paper](https://arxiv.org/abs/2303.07338)]
+> Intended authors are welcome to contact us to reproduce your methods in our repo. Feel free to merge your algorithm into PyCIL if you are using our codebase!
+## Reproduced Results
+#### CIFAR-100
+<div align="center">
+<img src="./resources/cifar100.png" width="900px">
+</div>
+#### ImageNet-100
+<div align="center">
+<img src="./resources/ImageNet100.png" width="900px">
+</div>
+#### ImageNet-100 (Top-5 Accuracy)
+<div align="center">
+<img src="./resources/imagenet20st5.png" width="500px">
+</div>
+> More experimental details and results can be found in our [survey](https://arxiv.org/abs/2302.03648).
+## How To Use
+### Clone
+Clone this GitHub repository:
+```
+git clone https://github.com/G-U-N/PyCIL.git
+cd PyCIL
+```
+### Dependencies
+1. [torch 1.81](https://github.com/pytorch/pytorch)
+2. [torchvision 0.6.0](https://github.com/pytorch/vision)
+3. [tqdm](https://github.com/tqdm/tqdm)
+4. [numpy](https://github.com/numpy/numpy)
+5. [scipy](https://github.com/scipy/scipy)
+6. [quadprog](https://github.com/quadprog/quadprog)
+7. [POT](https://github.com/PythonOT/POT)
+### Run experiment
+1. Edit the `[MODEL NAME].json` file for global settings.
+2. Edit the hyperparameters in the corresponding `[MODEL NAME].py` file (e.g., `models/icarl.py`).
+3. Run:
+```bash
+python main.py --config=./exps/[MODEL NAME].json
+```
+where [MODEL NAME] should be chosen from `finetune`, `ewc`, `lwf`, `replay`, `gem`,  `icarl`, `bic`, `wa`, `podnet`, `der`, etc.
+4. `hyper-parameters`
+When using PyCIL, you can edit the global parameters and algorithm-specific hyper-parameter in the corresponding json file.
+These parameters include:
+- **memory-size**: The total exemplar number in the incremental learning process. Assuming there are $K$ classes at the current stage, the model will preserve $\left[\frac{memory-size}{K}\right]$ exemplar per class.
+- **init-cls**: The number of classes in the first incremental stage. Since there are different settings in CIL with a different number of classes in the first stage, our framework enables different choices to define the initial stage.
+- **increment**: The number of classes in each incremental stage $i$, $i$ > 1. By default, the number of classes per incremental stage is equivalent per stage.
+- **convnet-type**: The backbone network for the incremental model. According to the benchmark setting, `ResNet32` is utilized for `CIFAR100`, and `ResNet18` is used for `ImageNet`.
+- **seed**: The random seed adopted for shuffling the class order. According to the benchmark setting, it is set to 1993 by default.
+Other parameters in terms of model optimization, e.g., batch size, optimization epoch, learning rate, learning rate decay, weight decay, milestone, and temperature, can be modified in the corresponding Python file.
+### Datasets
+We have implemented the pre-processing of `CIFAR100`, `imagenet100,` and `imagenet1000`. When training on `CIFAR100`, this framework will automatically download it.  When training on `imagenet100/1000`, you should specify the folder of your dataset in `utils/data.py`.
+```python
+    def download_data(self):
+        assert 0,"You should specify the folder of your dataset"
+        train_dir = '[DATA-PATH]/train/'
+        test_dir = '[DATA-PATH]/val/'
+```
+[Here](https://drive.google.com/drive/folders/1RBrPGrZzd1bHU5YG8PjdfwpHANZR_lhJ?usp=sharing) is the file list of ImageNet100 (or say ImageNet-Sub).
+## Awesome Papers using PyCIL
+### Our Papers
+- Expandable Subspace Ensemble for Pre-Trained Model-Based Class-Incremental Learning (**CVPR 2024**) [[paper](https://arxiv.org/abs/2403.12030 )] [[code](https://github.com/sun-hailong/CVPR24-Ease)]
+- Continual Learning with Pre-Trained Models: A Survey (**arXiv 2024**) [[paper](https://arxiv.org/abs/2401.16386)] [[code](https://github.com/sun-hailong/LAMDA-PILOT)]
+- Deep Class-Incremental Learning: A Survey (**arXiv 2023**) [[paper](https://arxiv.org/abs/2302.03648)] [[code](https://github.com/zhoudw-zdw/CIL_Survey/)]
+- Learning without Forgetting for Vision-Language Models (**arXiv 2023**) [[paper](https://arxiv.org/abs/2305.19270)]
+- Revisiting Class-Incremental Learning with Pre-Trained Models: Generalizability and Adaptivity are All You Need (**arXiv 2023**) [[paper](https://arxiv.org/abs/2303.07338)] [[code](https://github.com/zhoudw-zdw/RevisitingCIL)]
+- PILOT: A Pre-Trained Model-Based Continual Learning Toolbox (**arXiv 2023**) [[paper](https://arxiv.org/abs/2309.07117)] [[code](https://github.com/sun-hailong/LAMDA-PILOT)]
+- Few-Shot Class-Incremental Learning via Training-Free Prototype Calibration (**NeurIPS 2023**)[[paper](https://arxiv.org/abs/2312.05229)] [[Code](https://github.com/wangkiw/TEEN)]
+- BEEF: Bi-Compatible Class-Incremental Learning via Energy-Based Expansion and Fusion (**ICLR 2023**) [[paper](https://openreview.net/forum?id=iP77_axu0h3)] [[code](https://github.com/G-U-N/ICLR23-BEEF/)]
+- A model or 603 exemplars: Towards memory-efficient class-incremental learning (**ICLR 2023**) [[paper](https://arxiv.org/abs/2205.13218)] [[code](https://github.com/wangkiw/ICLR23-MEMO/)]
+- Few-shot class-incremental learning by sampling multi-phase tasks (**TPAMI 2022**) [[paper](https://arxiv.org/pdf/2203.17030.pdf)] [[code](https://github.com/zhoudw-zdw/TPAMI-Limit)]
+- Foster: Feature Boosting and Compression for Class-incremental Learning (**ECCV 2022**) [[paper](https://arxiv.org/abs/2204.04662)] [[code](https://github.com/G-U-N/ECCV22-FOSTER/)]
+- Forward compatible few-shot class-incremental learning (**CVPR 2022**) [[paper](https://openaccess.thecvf.com/content/CVPR2022/papers/Zhou_Forward_Compatible_Few-Shot_Class-Incremental_Learning_CVPR_2022_paper.pdf)] [[code](https://github.com/zhoudw-zdw/CVPR22-Fact)]
+- Co-Transport for Class-Incremental Learning (**ACM MM 2021**) [[paper](https://arxiv.org/abs/2107.12654)] [[code](https://github.com/zhoudw-zdw/MM21-Coil)]
+### Other Awesome Works
+- Towards Realistic Evaluation of Industrial Continual Learning Scenarios with an Emphasis on Energy Consumption and Computational Footprint (**ICCV 2023**) [[paper](https://openaccess.thecvf.com/content/ICCV2023/papers/Chavan_Towards_Realistic_Evaluation_of_Industrial_Continual_Learning_Scenarios_with_an_ICCV_2023_paper.pdf)][[code](https://github.com/Vivek9Chavan/RECIL)]
+- Dynamic Residual Classifier for Class Incremental Learning (**ICCV 2023**) [[paper](https://openaccess.thecvf.com/content/ICCV2023/papers/Chen_Dynamic_Residual_Classifier_for_Class_Incremental_Learning_ICCV_2023_paper.pdf)][[code](https://github.com/chen-xw/DRC-CIL)]
+- S-Prompts Learning with Pre-trained Transformers: An Occam's Razor for Domain Incremental Learning (**NeurIPS 2022**) [[paper](https://openreview.net/forum?id=ZVe_WeMold)] [[code](https://github.com/iamwangyabin/S-Prompts)]
+## License
+Please check the MIT  [license](./LICENSE) that is listed in this repository.
+## Acknowledgments
+We thank the following repos providing helpful components/functions in our work.
+- [Continual-Learning-Reproduce](https://github.com/zhchuu/continual-learning-reproduce)
+- [GEM](https://github.com/hursung1/GradientEpisodicMemory)
+- [FACIL](https://github.com/mmasana/FACIL)
+The training flow and data configurations are based on Continual-Learning-Reproduce. The original information of the repo is available in the base branch.
+## Contact
+If there are any questions, please feel free to  propose new features by opening an issue or contact with the author: **Da-Wei Zhou**([zhoudw@lamda.nju.edu.cn](mailto:zhoudw@lamda.nju.edu.cn)) and **Fu-Yun Wang**(wangfuyun@smail.nju.edu.cn). Enjoy the code.
+## Star History 🚀
+[![Star History Chart](https://api.star-history.com/svg?repos=G-U-N/PyCIL&type=Date)](https://star-history.com/#G-U-N/PyCIL&Date)

convs/__init__.py ADDED Viewed

File without changes

convs/cifar_resnet.py ADDED Viewed

	@@ -0,0 +1,207 @@

+'''
+Reference:
+https://github.com/khurramjaved96/incremental-learning/blob/autoencoders/model/resnet32.py
+'''
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class DownsampleA(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleA, self).__init__()
+        assert stride == 2
+        self.avg = nn.AvgPool2d(kernel_size=1, stride=stride)
+    def forward(self, x):
+        x = self.avg(x)
+        return torch.cat((x, x.mul(0)), 1)
+class DownsampleB(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleB, self).__init__()
+        self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=stride, padding=0, bias=False)
+        self.bn = nn.BatchNorm2d(nOut)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class DownsampleC(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleC, self).__init__()
+        assert stride != 1 or nIn != nOut
+        self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=stride, padding=0, bias=False)
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class DownsampleD(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleD, self).__init__()
+        assert stride == 2
+        self.conv = nn.Conv2d(nIn, nOut, kernel_size=2, stride=stride, padding=0, bias=False)
+        self.bn = nn.BatchNorm2d(nOut)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class ResNetBasicblock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(ResNetBasicblock, self).__init__()
+        self.conv_a = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn_a = nn.BatchNorm2d(planes)
+        self.conv_b = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn_b = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+    def forward(self, x):
+        residual = x
+        basicblock = self.conv_a(x)
+        basicblock = self.bn_a(basicblock)
+        basicblock = F.relu(basicblock, inplace=True)
+        basicblock = self.conv_b(basicblock)
+        basicblock = self.bn_b(basicblock)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        return F.relu(residual + basicblock, inplace=True)
+class CifarResNet(nn.Module):
+    """
+    ResNet optimized for the Cifar Dataset, as specified in
+    https://arxiv.org/abs/1512.03385.pdf
+    """
+    def __init__(self, block, depth, channels=3):
+        super(CifarResNet, self).__init__()
+        # Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
+        assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
+        layer_blocks = (depth - 2) // 6
+        self.conv_1_3x3 = nn.Conv2d(channels, 16, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn_1 = nn.BatchNorm2d(16)
+        self.inplanes = 16
+        self.stage_1 = self._make_layer(block, 16, layer_blocks, 1)
+        self.stage_2 = self._make_layer(block, 32, layer_blocks, 2)
+        self.stage_3 = self._make_layer(block, 64, layer_blocks, 2)
+        self.avgpool = nn.AvgPool2d(8)
+        self.out_dim = 64 * block.expansion
+        self.fc = nn.Linear(64*block.expansion, 10)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                # m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = DownsampleA(self.inplanes, planes * block.expansion, stride)
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv_1_3x3(x)  # [bs, 16, 32, 32]
+        x = F.relu(self.bn_1(x), inplace=True)
+        x_1 = self.stage_1(x)  # [bs, 16, 32, 32]
+        x_2 = self.stage_2(x_1)  # [bs, 32, 16, 16]
+        x_3 = self.stage_3(x_2)  # [bs, 64, 8, 8]
+        pooled = self.avgpool(x_3)  # [bs, 64, 1, 1]
+        features = pooled.view(pooled.size(0), -1)  # [bs, 64]
+        return {
+            'fmaps': [x_1, x_2, x_3],
+            'features': features
+        }
+    @property
+    def last_conv(self):
+        return self.stage_3[-1].conv_b
+def resnet20mnist():
+    """Constructs a ResNet-20 model for MNIST."""
+    model = CifarResNet(ResNetBasicblock, 20, 1)
+    return model
+def resnet32mnist():
+    """Constructs a ResNet-32 model for MNIST."""
+    model = CifarResNet(ResNetBasicblock, 32, 1)
+    return model
+def resnet20():
+    """Constructs a ResNet-20 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 20)
+    return model
+def resnet32():
+    """Constructs a ResNet-32 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 32)
+    return model
+def resnet44():
+    """Constructs a ResNet-44 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 44)
+    return model
+def resnet56():
+    """Constructs a ResNet-56 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 56)
+    return model
+def resnet110():
+    """Constructs a ResNet-110 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 110)
+    return model
+# for auc
+def resnet14():
+    model = CifarResNet(ResNetBasicblock, 14)
+    return model
+def resnet26():
+    model = CifarResNet(ResNetBasicblock, 26)
+    return model

convs/conv_cifar.py ADDED Viewed

	@@ -0,0 +1,77 @@

+'''
+For MEMO implementations of CIFAR-ConvNet
+Reference:
+https://github.com/wangkiw/ICLR23-MEMO/blob/main/convs/conv_cifar.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# for cifar
+def conv_block(in_channels, out_channels):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, 3, padding=1),
+        nn.BatchNorm2d(out_channels),
+        nn.ReLU(),
+        nn.MaxPool2d(2)
+    )
+class ConvNet2(nn.Module):
+    def __init__(self, x_dim=3, hid_dim=64, z_dim=64):
+        super().__init__()
+        self.out_dim = 64
+        self.avgpool = nn.AvgPool2d(8)
+        self.encoder = nn.Sequential(
+            conv_block(x_dim, hid_dim),
+            conv_block(hid_dim, z_dim),
+        )
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.avgpool(x)
+        features = x.view(x.shape[0], -1)
+        return {
+            "features":features
+        }
+class GeneralizedConvNet2(nn.Module):
+    def __init__(self, x_dim=3, hid_dim=64, z_dim=64):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            conv_block(x_dim, hid_dim),
+        )
+    def forward(self, x):
+        base_features = self.encoder(x)
+        return base_features
+class SpecializedConvNet2(nn.Module):
+    def __init__(self,hid_dim=64,z_dim=64):
+        super().__init__()
+        self.feature_dim = 64
+        self.avgpool = nn.AvgPool2d(8)
+        self.AdaptiveBlock = conv_block(hid_dim,z_dim)
+    def forward(self,x):
+        base_features = self.AdaptiveBlock(x)
+        pooled = self.avgpool(base_features)
+        features = pooled.view(pooled.size(0),-1)
+        return features
+def conv2():
+    return ConvNet2()
+def get_conv_a2fc():
+    basenet = GeneralizedConvNet2()
+    adaptivenet = SpecializedConvNet2()
+    return basenet,adaptivenet
+if __name__ == '__main__':
+    a, b = get_conv_a2fc()
+    _base = sum(p.numel() for p in a.parameters())
+    _adap = sum(p.numel() for p in b.parameters())
+    print(f"conv :{_base+_adap}")
+    conv2 = conv2()
+    conv2_sum = sum(p.numel() for p in conv2.parameters())
+    print(f"conv2 :{conv2_sum}")

convs/conv_imagenet.py ADDED Viewed

	@@ -0,0 +1,82 @@

+'''
+For MEMO implementations of ImageNet-ConvNet
+Reference:
+https://github.com/wangkiw/ICLR23-MEMO/blob/main/convs/conv_imagenet.py
+'''
+import torch.nn as nn
+import torch
+# for imagenet
+def first_block(in_channels, out_channels):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=2, padding=3),
+        nn.BatchNorm2d(out_channels),
+        nn.ReLU(),
+        nn.MaxPool2d(2)
+    )
+def conv_block(in_channels, out_channels):
+    return nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, 3, padding=1),
+        nn.BatchNorm2d(out_channels),
+        nn.ReLU(),
+        nn.MaxPool2d(2)
+    )
+class ConvNet(nn.Module):
+    def __init__(self, x_dim=3, hid_dim=128, z_dim=512):
+        super().__init__()
+        self.block1 = first_block(x_dim, hid_dim)
+        self.block2 = conv_block(hid_dim, hid_dim)
+        self.block3 = conv_block(hid_dim, hid_dim)
+        self.block4 = conv_block(hid_dim, z_dim)
+        self.avgpool = nn.AvgPool2d(7)
+        self.out_dim = 512
+    def forward(self, x):
+        x = self.block1(x)
+        x = self.block2(x)
+        x = self.block3(x)
+        x = self.block4(x)
+        x = self.avgpool(x)
+        features = x.view(x.shape[0], -1)
+        return {
+            "features": features
+        }
+class GeneralizedConvNet(nn.Module):
+    def __init__(self, x_dim=3, hid_dim=128, z_dim=512):
+        super().__init__()
+        self.block1 = first_block(x_dim, hid_dim)
+        self.block2 = conv_block(hid_dim, hid_dim)
+        self.block3 = conv_block(hid_dim, hid_dim)
+    def forward(self, x):
+        x = self.block1(x)
+        x = self.block2(x)
+        x = self.block3(x)
+        return x
+class SpecializedConvNet(nn.Module):
+    def __init__(self, hid_dim=128,z_dim=512):
+        super().__init__()
+        self.block4 = conv_block(hid_dim, z_dim)
+        self.avgpool = nn.AvgPool2d(7)
+        self.feature_dim = 512
+    def forward(self, x):
+        x = self.block4(x)
+        x = self.avgpool(x)
+        features = x.view(x.shape[0], -1)
+        return features
+def conv4():
+    model = ConvNet()
+    return model
+def conv_a2fc_imagenet():
+    _base = GeneralizedConvNet()
+    _adaptive_net = SpecializedConvNet()
+    return _base, _adaptive_net

convs/linears.py ADDED Viewed

	@@ -0,0 +1,167 @@

+'''
+Reference:
+https://github.com/hshustc/CVPR19_Incremental_Learning/blob/master/cifar100-class-incremental/modified_linear.py
+'''
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+class SimpleLinear(nn.Module):
+    '''
+    Reference:
+    https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/linear.py
+    '''
+    def __init__(self, in_features, out_features, bias=True):
+        super(SimpleLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_features))
+        else:
+            self.register_parameter('bias', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        nn.init.kaiming_uniform_(self.weight, nonlinearity='linear')
+        nn.init.constant_(self.bias, 0)
+    def forward(self, input):
+        return {'logits': F.linear(input, self.weight, self.bias)}
+class CosineLinear(nn.Module):
+    def __init__(self, in_features, out_features, nb_proxy=1, to_reduce=False, sigma=True):
+        super(CosineLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features * nb_proxy
+        self.nb_proxy = nb_proxy
+        self.to_reduce = to_reduce
+        self.weight = nn.Parameter(torch.Tensor(self.out_features, in_features))
+        if sigma:
+            self.sigma = nn.Parameter(torch.Tensor(1))
+        else:
+            self.register_parameter('sigma', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        stdv = 1. / math.sqrt(self.weight.size(1))
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.sigma is not None:
+            self.sigma.data.fill_(1)
+    def forward(self, input):
+        out = F.linear(F.normalize(input, p=2, dim=1), F.normalize(self.weight, p=2, dim=1))
+        if self.to_reduce:
+            # Reduce_proxy
+            out = reduce_proxies(out, self.nb_proxy)
+        if self.sigma is not None:
+            out = self.sigma * out
+        return {'logits': out}
+class SplitCosineLinear(nn.Module):
+    def __init__(self, in_features, out_features1, out_features2, nb_proxy=1, sigma=True):
+        super(SplitCosineLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = (out_features1 + out_features2) * nb_proxy
+        self.nb_proxy = nb_proxy
+        self.fc1 = CosineLinear(in_features, out_features1, nb_proxy, False, False)
+        self.fc2 = CosineLinear(in_features, out_features2, nb_proxy, False, False)
+        if sigma:
+            self.sigma = nn.Parameter(torch.Tensor(1))
+            self.sigma.data.fill_(1)
+        else:
+            self.register_parameter('sigma', None)
+    def forward(self, x):
+        out1 = self.fc1(x)
+        out2 = self.fc2(x)
+        out = torch.cat((out1['logits'], out2['logits']), dim=1)  # concatenate along the channel
+        # Reduce_proxy
+        out = reduce_proxies(out, self.nb_proxy)
+        if self.sigma is not None:
+            out = self.sigma * out
+        return {
+            'old_scores': reduce_proxies(out1['logits'], self.nb_proxy),
+            'new_scores': reduce_proxies(out2['logits'], self.nb_proxy),
+            'logits': out
+        }
+def reduce_proxies(out, nb_proxy):
+    if nb_proxy == 1:
+        return out
+    bs = out.shape[0]
+    nb_classes = out.shape[1] / nb_proxy
+    assert nb_classes.is_integer(), 'Shape error'
+    nb_classes = int(nb_classes)
+    simi_per_class = out.view(bs, nb_classes, nb_proxy)
+    attentions = F.softmax(simi_per_class, dim=-1)
+    return (attentions * simi_per_class).sum(-1)
+'''
+class CosineLinear(nn.Module):
+    def __init__(self, in_features, out_features, sigma=True):
+        super(CosineLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
+        if sigma:
+            self.sigma = nn.Parameter(torch.Tensor(1))
+        else:
+            self.register_parameter('sigma', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        stdv = 1. / math.sqrt(self.weight.size(1))
+        self.weight.data.uniform_(-stdv, stdv)
+        if self.sigma is not None:
+            self.sigma.data.fill_(1)
+    def forward(self, input):
+        out = F.linear(F.normalize(input, p=2, dim=1), F.normalize(self.weight, p=2, dim=1))
+        if self.sigma is not None:
+            out = self.sigma * out
+        return {'logits': out}
+class SplitCosineLinear(nn.Module):
+    def __init__(self, in_features, out_features1, out_features2, sigma=True):
+        super(SplitCosineLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features1 + out_features2
+        self.fc1 = CosineLinear(in_features, out_features1, False)
+        self.fc2 = CosineLinear(in_features, out_features2, False)
+        if sigma:
+            self.sigma = nn.Parameter(torch.Tensor(1))
+            self.sigma.data.fill_(1)
+        else:
+            self.register_parameter('sigma', None)
+    def forward(self, x):
+        out1 = self.fc1(x)
+        out2 = self.fc2(x)
+        out = torch.cat((out1['logits'], out2['logits']), dim=1)  # concatenate along the channel
+        if self.sigma is not None:
+            out = self.sigma * out
+        return {
+            'old_scores': out1['logits'],
+            'new_scores': out2['logits'],
+            'logits': out
+        }
+'''

convs/memo_cifar_resnet.py ADDED Viewed

	@@ -0,0 +1,164 @@

+'''
+For MEMO implementations of CIFAR-ResNet
+Reference:
+https://github.com/khurramjaved96/incremental-learning/blob/autoencoders/model/resnet32.py
+'''
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class DownsampleA(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleA, self).__init__()
+        assert stride == 2
+        self.avg = nn.AvgPool2d(kernel_size=1, stride=stride)
+    def forward(self, x):
+        x = self.avg(x)
+        return torch.cat((x, x.mul(0)), 1)
+class ResNetBasicblock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(ResNetBasicblock, self).__init__()
+        self.conv_a = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn_a = nn.BatchNorm2d(planes)
+        self.conv_b = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn_b = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+    def forward(self, x):
+        residual = x
+        basicblock = self.conv_a(x)
+        basicblock = self.bn_a(basicblock)
+        basicblock = F.relu(basicblock, inplace=True)
+        basicblock = self.conv_b(basicblock)
+        basicblock = self.bn_b(basicblock)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        return F.relu(residual + basicblock, inplace=True)
+class GeneralizedResNet_cifar(nn.Module):
+    def __init__(self, block, depth, channels=3):
+        super(GeneralizedResNet_cifar, self).__init__()
+        assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
+        layer_blocks = (depth - 2) // 6
+        self.conv_1_3x3 = nn.Conv2d(channels, 16, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn_1 = nn.BatchNorm2d(16)
+        self.inplanes = 16
+        self.stage_1 = self._make_layer(block, 16, layer_blocks, 1)
+        self.stage_2 = self._make_layer(block, 32, layer_blocks, 2)
+        self.out_dim = 64 * block.expansion
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                # m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = DownsampleA(self.inplanes, planes * block.expansion, stride)
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv_1_3x3(x)  # [bs, 16, 32, 32]
+        x = F.relu(self.bn_1(x), inplace=True)
+        x_1 = self.stage_1(x)  # [bs, 16, 32, 32]
+        x_2 = self.stage_2(x_1)  # [bs, 32, 16, 16]
+        return x_2
+class SpecializedResNet_cifar(nn.Module):
+    def __init__(self, block, depth, inplanes=32, feature_dim=64):
+        super(SpecializedResNet_cifar, self).__init__()
+        self.inplanes = inplanes
+        self.feature_dim = feature_dim
+        layer_blocks = (depth - 2) // 6
+        self.final_stage = self._make_layer(block, 64, layer_blocks, 2)
+        self.avgpool = nn.AvgPool2d(8)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                # m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=2):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = DownsampleA(self.inplanes, planes * block.expansion, stride)
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, base_feature_map):
+        final_feature_map = self.final_stage(base_feature_map)
+        pooled = self.avgpool(final_feature_map)
+        features = pooled.view(pooled.size(0), -1) #bs x 64
+        return features
+#For cifar & MEMO
+def get_resnet8_a2fc():
+    basenet = GeneralizedResNet_cifar(ResNetBasicblock,8)
+    adaptivenet = SpecializedResNet_cifar(ResNetBasicblock,8)
+    return basenet,adaptivenet
+def get_resnet14_a2fc():
+    basenet = GeneralizedResNet_cifar(ResNetBasicblock,14)
+    adaptivenet = SpecializedResNet_cifar(ResNetBasicblock,14)
+    return basenet,adaptivenet
+def get_resnet20_a2fc():
+    basenet = GeneralizedResNet_cifar(ResNetBasicblock,20)
+    adaptivenet = SpecializedResNet_cifar(ResNetBasicblock,20)
+    return basenet,adaptivenet
+def get_resnet26_a2fc():
+    basenet = GeneralizedResNet_cifar(ResNetBasicblock,26)
+    adaptivenet = SpecializedResNet_cifar(ResNetBasicblock,26)
+    return basenet,adaptivenet
+def get_resnet32_a2fc():
+    basenet = GeneralizedResNet_cifar(ResNetBasicblock,32)
+    adaptivenet = SpecializedResNet_cifar(ResNetBasicblock,32)
+    return basenet,adaptivenet

convs/memo_resnet.py ADDED Viewed

	@@ -0,0 +1,322 @@

+'''
+For MEMO implementations of ImageNet-ResNet
+Reference:
+https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+'''
+import torch
+import torch.nn as nn
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except:
+    from torch.hub import load_state_dict_from_url
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    __constants__ = ['downsample']
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    __constants__ = ['downsample']
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class GeneralizedResNet_imagenet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(GeneralizedResNet_imagenet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,  # stride=2 -> stride=1 for cifar
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # Removed in _forward_impl for cifar
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.out_dim = 512 * block.expansion
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x_1 = self.layer1(x)
+        x_2 = self.layer2(x_1)
+        x_3 = self.layer3(x_2)
+        return x_3
+    def forward(self, x):
+        return self._forward_impl(x)
+class SpecializedResNet_imagenet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                    groups=1, width_per_group=64, replace_stride_with_dilation=None,
+        norm_layer=None):
+        super(SpecializedResNet_imagenet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.feature_dim = 512 * block.expansion
+        self.inplanes = 256 * block.expansion
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                                "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                        dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.out_dim = 512 * block.expansion
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def forward(self,x):
+        x_4 = self.layer4(x)  # [bs, 512, 4, 4]
+        pooled = self.avgpool(x_4)  # [bs, 512, 1, 1]
+        features = torch.flatten(pooled, 1)  # [bs, 512]
+        return features
+def get_resnet10_imagenet():
+    basenet = GeneralizedResNet_imagenet(BasicBlock,[1, 1, 1, 1])
+    adaptivenet = SpecializedResNet_imagenet(BasicBlock, [1, 1, 1, 1])
+    return basenet,adaptivenet
+def get_resnet18_imagenet():
+    basenet = GeneralizedResNet_imagenet(BasicBlock,[2, 2, 2, 2])
+    adaptivenet = SpecializedResNet_imagenet(BasicBlock, [2, 2, 2, 2])
+    return basenet,adaptivenet
+def get_resnet26_imagenet():
+    basenet = GeneralizedResNet_imagenet(Bottleneck,[2, 2, 2, 2])
+    adaptivenet = SpecializedResNet_imagenet(Bottleneck, [2, 2, 2, 2])
+    return basenet,adaptivenet
+def get_resnet34_imagenet():
+    basenet = GeneralizedResNet_imagenet(BasicBlock,[3, 4, 6, 3])
+    adaptivenet = SpecializedResNet_imagenet(BasicBlock, [3, 4, 6, 3])
+    return basenet,adaptivenet
+def get_resnet50_imagenet():
+    basenet = GeneralizedResNet_imagenet(Bottleneck,[3, 4, 6, 3])
+    adaptivenet = SpecializedResNet_imagenet(Bottleneck, [3, 4, 6, 3])
+    return basenet,adaptivenet
+if __name__ == '__main__':
+    model2imagenet = 3*224*224
+    a, b = get_resnet10_imagenet()
+    _base = sum(p.numel() for p in a.parameters())
+    _adap = sum(p.numel() for p in b.parameters())
+    print(f"resnet10 #params:{_base+_adap}")
+    a, b = get_resnet18_imagenet()
+    _base = sum(p.numel() for p in a.parameters())
+    _adap = sum(p.numel() for p in b.parameters())
+    print(f"resnet18 #params:{_base+_adap}")
+    a, b = get_resnet26_imagenet()
+    _base = sum(p.numel() for p in a.parameters())
+    _adap = sum(p.numel() for p in b.parameters())
+    print(f"resnet26 #params:{_base+_adap}")
+    a, b = get_resnet34_imagenet()
+    _base = sum(p.numel() for p in a.parameters())
+    _adap = sum(p.numel() for p in b.parameters())
+    print(f"resnet34 #params:{_base+_adap}")
+    a, b = get_resnet50_imagenet()
+    _base = sum(p.numel() for p in a.parameters())
+    _adap = sum(p.numel() for p in b.parameters())
+    print(f"resnet50 #params:{_base+_adap}")

convs/modified_represnet.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+import torch.nn.functional as F
+__all__ = ['ResNet', 'resnet18_rep', 'resnet34_rep' ]
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=True)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True)
+class conv_block(nn.Module):
+    def __init__(self, in_planes, planes, mode, stride=1):
+        super(conv_block, self).__init__()
+        self.conv = conv3x3(in_planes, planes, stride)
+        self.mode = mode
+        if mode == 'parallel_adapters':
+            self.adapter = conv1x1(in_planes, planes, stride)
+    def re_init_conv(self):
+        nn.init.kaiming_normal_(self.adapter.weight, mode='fan_out', nonlinearity='relu')
+        return
+    def forward(self, x):
+        y = self.conv(x)
+        if self.mode == 'parallel_adapters':
+            y = y + self.adapter(x)
+        return y
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, mode, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv_block(inplanes, planes, mode, stride)
+        self.norm1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv_block(planes, planes, mode)
+        self.norm2 = nn.BatchNorm2d(planes)
+        self.mode = mode
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.norm1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.norm2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=100, args = None):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        assert args is not None
+        self.mode = args["mode"]
+        if 'cifar' in args["dataset"]:
+            self.conv1 = nn.Sequential(nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                                       nn.BatchNorm2d(self.inplanes), nn.ReLU(inplace=True))
+            print("use cifar")
+        elif 'imagenet' in args["dataset"] or 'stanfordcar' in args["dataset"]:
+            if args["init_cls"] == args["increment"]:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+            else:
+                # Following PODNET implmentation
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.feature = nn.AvgPool2d(4, stride=1)
+        self.out_dim = 512
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=True),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, self.mode, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, self.mode))
+        return nn.Sequential(*layers)
+    def switch(self, mode='normal'):
+        for name, module in self.named_modules():
+            if hasattr(module, 'mode'):
+                module.mode = mode
+    def re_init_params(self):
+        for name, module in self.named_modules():
+            if hasattr(module, 're_init_conv'):
+                module.re_init_conv()
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        dim = x.size()[-1]
+        pool = nn.AvgPool2d(dim, stride=1)
+        x = pool(x)
+        x = x.view(x.size(0), -1)
+        return {"features": x}
+def resnet18_rep(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+def resnet34_rep(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model

convs/resnet.py ADDED Viewed

	@@ -0,0 +1,395 @@

+'''
+Reference:
+https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+'''
+import torch
+import torch.nn as nn
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except:
+    from torch.hub import load_state_dict_from_url
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    __constants__ = ['downsample']
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    __constants__ = ['downsample']
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None,args=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        assert args is not None, "you should pass args to resnet"
+        if 'cifar' in args["dataset"]:
+            if args["model_name"] == "memo":
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+            else:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True))
+        elif 'imagenet' in args["dataset"] or 'stanfordcar' in args['dataset'] or 'general_dataset' in args['dataset']:
+            if args["init_cls"] == args["increment"]:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+            else:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.out_dim = 512 * block.expansion
+        # self.fc = nn.Linear(512 * block.expansion, num_classes)  # Removed in _forward_impl
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, x):
+        # See note [TorchScript super()]
+        x = self.conv1(x)  # [bs, 64, 32, 32]
+        x_1 = self.layer1(x)  # [bs, 128, 32, 32]
+        x_2 = self.layer2(x_1)  # [bs, 256, 16, 16]
+        x_3 = self.layer3(x_2)  # [bs, 512, 8, 8]
+        x_4 = self.layer4(x_3)  # [bs, 512, 4, 4]
+        pooled = self.avgpool(x_4)  # [bs, 512, 1, 1]
+        features = torch.flatten(pooled, 1)  # [bs, 512]
+        # x = self.fc(x)
+        return {
+            'fmaps': [x_1, x_2, x_3, x_4],
+            'features': features
+        }
+    def forward(self, x):
+        return self._forward_impl(x)
+    @property
+    def last_conv(self):
+        if hasattr(self.layer4[-1], 'conv3'):
+            return self.layer4[-1].conv3
+        else:
+            return self.layer4[-1].conv2
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+def resnet10(pretrained=False, progress=True, **kwargs):
+    """
+    For MEMO implementations of ResNet-10
+    """
+    return _resnet('resnet10', BasicBlock, [1, 1, 1, 1], pretrained, progress,
+                   **kwargs)
+def resnet26(pretrained=False, progress=True, **kwargs):
+    """
+    For MEMO implementations of ResNet-26
+    """
+    return _resnet('resnet26', Bottleneck, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet101(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+def resnet152(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)

convs/resnet_cbam.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+import torch.nn.functional as F
+__all__ = ['ResNet', 'resnet18_cbam', 'resnet34_cbam', 'resnet50_cbam', 'resnet101_cbam',
+           'resnet152_cbam']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class ChannelAttention(nn.Module):
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+        self.fc1   = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
+        self.relu1 = nn.ReLU()
+        self.fc2   = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
+        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+class SpatialAttention(nn.Module):
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
+        padding = 3 if kernel_size == 7 else 1
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.ca = ChannelAttention(planes)
+        self.sa = SpatialAttention()
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.ca = ChannelAttention(planes * 4)
+        self.sa = SpatialAttention()
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        out = self.ca(out) * out
+        out = self.sa(out) * out
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=100, args=None):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        assert args is not None, "you should pass args to resnet"
+        if 'cifar' in args["dataset"]:
+            self.conv1 = nn.Sequential(nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                                       nn.BatchNorm2d(self.inplanes), nn.ReLU(inplace=True))
+        elif 'imagenet' in args["dataset"] or 'stanfordcar' in args['dataset']:
+            if args["init_cls"] == args["increment"]:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+            else:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.feature = nn.AvgPool2d(4, stride=1)
+        # self.fc = nn.Linear(512 * block.expansion, num_classes)
+        self.out_dim = 512 * block.expansion
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        dim = x.size()[-1]
+        pool = nn.AvgPool2d(dim, stride=1)
+        x = pool(x)
+        x = x.view(x.size(0), -1)
+        return {"features": x}
+def resnet18_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+def resnet34_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+def resnet50_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+def resnet101_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+def resnet152_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model

convs/ucir_cifar_resnet.py ADDED Viewed

	@@ -0,0 +1,204 @@

+'''
+Reference:
+https://github.com/khurramjaved96/incremental-learning/blob/autoencoders/model/resnet32.py
+https://github.com/hshustc/CVPR19_Incremental_Learning/blob/master/cifar100-class-incremental/modified_resnet_cifar.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# from convs.modified_linear import CosineLinear
+class DownsampleA(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleA, self).__init__()
+        assert stride == 2
+        self.avg = nn.AvgPool2d(kernel_size=1, stride=stride)
+    def forward(self, x):
+        x = self.avg(x)
+        return torch.cat((x, x.mul(0)), 1)
+class DownsampleB(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleB, self).__init__()
+        self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=stride, padding=0, bias=False)
+        self.bn = nn.BatchNorm2d(nOut)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class DownsampleC(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleC, self).__init__()
+        assert stride != 1 or nIn != nOut
+        self.conv = nn.Conv2d(nIn, nOut, kernel_size=1, stride=stride, padding=0, bias=False)
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class DownsampleD(nn.Module):
+    def __init__(self, nIn, nOut, stride):
+        super(DownsampleD, self).__init__()
+        assert stride == 2
+        self.conv = nn.Conv2d(nIn, nOut, kernel_size=2, stride=stride, padding=0, bias=False)
+        self.bn = nn.BatchNorm2d(nOut)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class ResNetBasicblock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, last=False):
+        super(ResNetBasicblock, self).__init__()
+        self.conv_a = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn_a = nn.BatchNorm2d(planes)
+        self.conv_b = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn_b = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.last = last
+    def forward(self, x):
+        residual = x
+        basicblock = self.conv_a(x)
+        basicblock = self.bn_a(basicblock)
+        basicblock = F.relu(basicblock, inplace=True)
+        basicblock = self.conv_b(basicblock)
+        basicblock = self.bn_b(basicblock)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out = residual + basicblock
+        if not self.last:
+            out = F.relu(out, inplace=True)
+        return out
+class CifarResNet(nn.Module):
+    """
+    ResNet optimized for the Cifar Dataset, as specified in
+    https://arxiv.org/abs/1512.03385.pdf
+    """
+    def __init__(self, block, depth, channels=3):
+        super(CifarResNet, self).__init__()
+        # Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
+        assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110'
+        layer_blocks = (depth - 2) // 6
+        self.conv_1_3x3 = nn.Conv2d(channels, 16, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn_1 = nn.BatchNorm2d(16)
+        self.inplanes = 16
+        self.stage_1 = self._make_layer(block, 16, layer_blocks, 1)
+        self.stage_2 = self._make_layer(block, 32, layer_blocks, 2)
+        self.stage_3 = self._make_layer(block, 64, layer_blocks, 2, last_phase=True)
+        self.avgpool = nn.AvgPool2d(8)
+        self.out_dim = 64 * block.expansion
+        # self.fc = CosineLinear(64*block.expansion, 10)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, last_phase=False):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = DownsampleB(self.inplanes, planes * block.expansion, stride)  # DownsampleA => DownsampleB
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        if last_phase:
+            for i in range(1, blocks-1):
+                layers.append(block(self.inplanes, planes))
+            layers.append(block(self.inplanes, planes, last=True))
+        else:
+            for i in range(1, blocks):
+                layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv_1_3x3(x)  # [bs, 16, 32, 32]
+        x = F.relu(self.bn_1(x), inplace=True)
+        x_1 = self.stage_1(x)  # [bs, 16, 32, 32]
+        x_2 = self.stage_2(x_1)  # [bs, 32, 16, 16]
+        x_3 = self.stage_3(x_2)  # [bs, 64, 8, 8]
+        pooled = self.avgpool(x_3)  # [bs, 64, 1, 1]
+        features = pooled.view(pooled.size(0), -1)  # [bs, 64]
+        # out = self.fc(vector)
+        return {
+            'fmaps': [x_1, x_2, x_3],
+            'features': features
+        }
+    @property
+    def last_conv(self):
+        return self.stage_3[-1].conv_b
+def resnet20mnist():
+    """Constructs a ResNet-20 model for MNIST."""
+    model = CifarResNet(ResNetBasicblock, 20, 1)
+    return model
+def resnet32mnist():
+    """Constructs a ResNet-32 model for MNIST."""
+    model = CifarResNet(ResNetBasicblock, 32, 1)
+    return model
+def resnet20():
+    """Constructs a ResNet-20 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 20)
+    return model
+def resnet32():
+    """Constructs a ResNet-32 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 32)
+    return model
+def resnet44():
+    """Constructs a ResNet-44 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 44)
+    return model
+def resnet56():
+    """Constructs a ResNet-56 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 56)
+    return model
+def resnet110():
+    """Constructs a ResNet-110 model for CIFAR-10."""
+    model = CifarResNet(ResNetBasicblock, 110)
+    return model

convs/ucir_resnet.py ADDED Viewed

	@@ -0,0 +1,299 @@

+'''
+Reference:
+https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+'''
+import torch
+import torch.nn as nn
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except:
+    from torch.hub import load_state_dict_from_url
+__all__ = ['resnet50']
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    __constants__ = ['downsample']
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None, last=False):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+        self.last = last
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        if not self.last:
+            out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    __constants__ = ['downsample']
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None, last=False):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+        self.last = last
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        if not self.last:
+            out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None, args=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        assert args is not None, "you should pass args to resnet"
+        if 'cifar' in args["dataset"]:
+            self.conv1 = nn.Sequential(nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                                       nn.BatchNorm2d(self.inplanes), nn.ReLU(inplace=True))
+        elif 'imagenet' in args["dataset"] or 'stanfordcar' in args["dataset"] or 'general_dataset' in args['dataset']:
+            if args["init_cls"] == args["increment"]:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+            else:
+                self.conv1 = nn.Sequential(
+                    nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False),
+                    nn.BatchNorm2d(self.inplanes),
+                    nn.ReLU(inplace=True),
+                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                )
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2], last_phase=True)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.out_dim = 512 * block.expansion
+        self.fc = nn.Linear(512 * block.expansion, num_classes)  # Removed in _forward_impl
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False, last_phase=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        if last_phase:
+            for _ in range(1, blocks-1):
+                layers.append(block(self.inplanes, planes, groups=self.groups,
+                                    base_width=self.base_width, dilation=self.dilation,
+                                    norm_layer=norm_layer))
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer, last=True))
+        else:
+            for _ in range(1, blocks):
+                layers.append(block(self.inplanes, planes, groups=self.groups,
+                                    base_width=self.base_width, dilation=self.dilation,
+                                    norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, x):
+        # See note [TorchScript super()]
+        x = self.conv1(x)  # [bs, 64, 32, 32]
+        x_1 = self.layer1(x)  # [bs, 128, 32, 32]
+        x_2 = self.layer2(x_1)  # [bs, 256, 16, 16]
+        x_3 = self.layer3(x_2)  # [bs, 512, 8, 8]
+        x_4 = self.layer4(x_3)  # [bs, 512, 4, 4]
+        pooled = self.avgpool(x_4)  # [bs, 512, 1, 1]
+        features = torch.flatten(pooled, 1)  # [bs, 512]
+        # x = self.fc(x)
+        return {
+            'fmaps': [x_1, x_2, x_3, x_4],
+            'features': features
+        }
+    def forward(self, x):
+        return self._forward_impl(x)
+    @property
+    def last_conv(self):
+        if hasattr(self.layer4[-1], 'conv3'):
+            return self.layer4[-1].conv3
+        else:
+            return self.layer4[-1].conv2
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)

download_dataset.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+#!/bin/sh
+kaggle datasets download -d senemanu/stanfordcarsfcs
+unzip -qq stanfordcarsfcs.zip
+rm -rf ./car_data/car_data/train/models
+mv ./car_data/car_data/test ./car_data/car_data/val

download_file_from_s3.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import boto3
+from botocore.exceptions import NoCredentialsError
+def download_from_s3(bucket_name, s3_key, local_path, is_directory=False):
+    """
+    Download a file or directory from S3 to a local path.
+    :param bucket_name: str. The name of the S3 bucket.
+    :param s3_key: str. The S3 key (path to the file or directory).
+    :param local_path: str. The local file path or directory to download to.
+    :param is_directory: bool. Set to True if s3_key is a directory.
+    """
+    s3 = boto3.client("s3")
+    if is_directory:
+        # Ensure the local directory exists
+        if not os.path.exists(local_path):
+            os.makedirs(local_path)
+        # List all objects in the specified S3 directory
+        result = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_key)
+        print(result)
+        if "Contents" in result:
+            for obj in result["Contents"]:
+                s3_object_key = obj["Key"]
+                # Remove the directory prefix to get the relative file path
+                relative_path = os.path.relpath(s3_object_key, s3_key)
+                local_file_path = os.path.join(local_path, relative_path)
+                # Ensure the local directory for the file exists
+                local_file_dir = os.path.dirname(local_file_path)
+                if not os.path.exists(local_file_dir):
+                    os.makedirs(local_file_dir)
+                # Download the file
+                s3.download_file(bucket_name, s3_object_key, local_file_path)
+                print(f"Downloaded {s3_object_key} to {local_file_path}")
+    else:
+        # Download a single file
+        print(f"Downloaded {s3_key} to {local_path}")
+        s3.download_file(bucket_name, s3_key, local_path)
+# Example usage:
+# download_from_s3('my-bucket', 'path/to/myfile.txt', 'local/path/to/myfile.txt')
+# download_from_s3('my-bucket', 'path/to/mydirectory/', 'local/path/to/mydirectory', is_directory=True)

download_s3_path.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import boto3
+from botocore.exceptions import NoCredentialsError, PartialCredentialsError
+def download_s3_folder(bucket_name, s3_folder, local_dir):
+    # Convert local_dir to an absolute path
+    local_dir = os.path.abspath(local_dir)
+    # Ensure local directory exists
+    if not os.path.exists(local_dir):
+        os.makedirs(local_dir, exist_ok=True)
+    s3 = boto3.client('s3')
+    try:
+        # List objects within the specified folder
+        objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_folder)
+        if 'Contents' not in objects:
+            print(f"The folder '{s3_folder}' does not contain any files.")
+            return
+        for obj in objects['Contents']:
+            # Formulate the local file path
+            s3_file_path = obj['Key']
+            if s3_file_path.endswith('/'):
+                # Skip directories
+                continue
+            local_file_path = os.path.join(local_dir, os.path.relpath(s3_file_path, s3_folder))
+            # Create local directories if they do not exist
+            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
+            # Download the file
+            s3.download_file(bucket_name, s3_file_path, local_file_path)
+            print(f'Downloaded {s3_file_path} to {local_file_path}')
+    except KeyError:
+        print(f"The folder '{s3_folder}' does not contain any files.")
+    except NoCredentialsError:
+        print("Credentials not available.")
+    except PartialCredentialsError:
+        print("Incomplete credentials provided.")
+    except PermissionError as e:
+        print(f"Permission error: {e}. Please check your directory permissions.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Download an S3 folder to a local directory.')
+    parser.add_argument('-bucket', type=str, required=True, help='The S3 bucket name.')
+    parser.add_argument('-s3_folder', type=str, required=True, help='The folder path within the S3 bucket.')
+    parser.add_argument('-local_dir', type=str, required=True, help='The local directory to download the files to.')
+    args = parser.parse_args()
+    download_s3_folder(args.bucket, args.s3_folder, args.local_dir)

entrypoint.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+#!/bin/sh
+set -e
+chmod +x train.sh install_awscli.sh
+mkdir upload
+python server.py

eval.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import sys
+import logging
+import copy
+import torch
+from PIL import Image
+import torchvision.transforms as transforms
+from utils import factory
+from utils.data_manager import DataManager
+from torch.utils.data import DataLoader
+from utils.toolkit import count_parameters
+import os
+import numpy as np
+import json
+import argparse
+import torch.multiprocessing
+torch.multiprocessing.set_sharing_strategy('file_system')
+def _set_device(args):
+    device_type = args["device"]
+    gpus = []
+    for device in device_type:
+        if device == -1:
+            device = torch.device("cpu")
+        else:
+            device = torch.device("cuda:{}".format(device))
+        gpus.append(device)
+    args["device"] = gpus
+def get_methods(object, spacing=20):
+  methodList = []
+  for method_name in dir(object):
+    try:
+        if callable(getattr(object, method_name)):
+            methodList.append(str(method_name))
+    except Exception:
+        methodList.append(str(method_name))
+  processFunc = (lambda s: ' '.join(s.split())) or (lambda s: s)
+  for method in methodList:
+    try:
+        print(str(method.ljust(spacing)) + ' ' +
+              processFunc(str(getattr(object, method).__doc__)[0:90]))
+    except Exception:
+        print(method.ljust(spacing) + ' ' + ' getattr() failed')
+def load_model(args):
+    _set_device(args)
+    model = factory.get_model(args["model_name"], args)
+    model.load_checkpoint(args["checkpoint"])
+    return model
+def evaluate(args):
+    logs_name = "logs/{}/{}_{}/{}/{}".format(args["model_name"],args["dataset"], args['data'], args['init_cls'], args['increment'])
+    if not os.path.exists(logs_name):
+        os.makedirs(logs_name)
+    logfilename = "logs/{}/{}_{}/{}/{}/{}_{}_{}".format(
+        args["model_name"],
+        args["dataset"],
+        args['data'],
+        args['init_cls'],
+        args["increment"],
+        args["prefix"],
+        args["seed"],
+        args["convnet_type"],
+    )
+    if not os.path.exists(logs_name):
+        os.makedirs(logs_name)
+    args['logfilename'] = logs_name
+    args['csv_name'] = "{}_{}_{}".format(
+        args["prefix"],
+        args["seed"],
+        args["convnet_type"],
+    )
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(filename)s] => %(message)s",
+        handlers=[
+            logging.FileHandler(filename=logfilename + ".log"),
+            logging.StreamHandler(sys.stdout),
+        ],
+    )
+    _set_random()
+    print_args(args)
+    model = load_model(args)
+    data_manager = DataManager(
+        args["dataset"],
+        False,
+        args["seed"],
+        args["init_cls"],
+        args["increment"],
+        path = args["data"]
+    )
+    loader = DataLoader(data_manager.get_dataset(model.class_list, source = "test", mode = "test"), batch_size=args['batch_size'], shuffle=True, num_workers=8)
+    cnn_acc, nme_acc = model.eval_task(loader, group = 1, mode = "test")
+    print(cnn_acc, nme_acc)
+def main():
+    args = setup_parser().parse_args()
+    param = load_json(args.config)
+    args = vars(args)  # Converting argparse Namespace to a dict.
+    args.update(param)  # Add parameters from json
+    evaluate(args)
+def load_json(settings_path):
+    with open(settings_path) as data_file:
+        param = json.load(data_file)
+    return param
+def _set_random():
+    torch.manual_seed(1)
+    torch.cuda.manual_seed(1)
+    torch.cuda.manual_seed_all(1)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def setup_parser():
+    parser = argparse.ArgumentParser(description='Reproduce of multiple continual learning algorthms.')
+    parser.add_argument('--config', type=str, default='./exps/finetune.json',
+                        help='Json file of settings.')
+    parser.add_argument('-d','--data', type=str, help='Path of the data folder')
+    parser.add_argument('-c','--checkpoint', type=str, help='Path of checkpoint file if resume training')
+    return parser
+def print_args(args):
+    for key, value in args.items():
+        logging.info("{}: {}".format(key, value))
+if __name__ == '__main__':
+    main()

exps/beef.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "prefix": "fusion-energy-0.01-1.7-fixed",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "beefiso",
+    "convnet_type": "resnet18",
+    "device": ["0", "1"],
+    "seed": [2003],
+    "logits_alignment": 1.7,
+    "energy_weight": 0.01,
+    "is_compress":false,
+    "reduce_batch_size": false,
+    "init_epochs": 1,
+    "init_lr" : 0.1,
+    "init_weight_decay" : 5e-4,
+    "expansion_epochs" : 1,
+    "fusion_epochs" : 1,
+    "lr" : 0.1,
+    "batch_size" : 32,
+    "weight_decay" : 5e-4,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/bic.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "cifar100",
+    "memory_size": 2000,
+    "memory_per_class": 20,
+    "fixed_memory": false,
+    "shuffle": true,
+    "init_cls": 10,
+    "increment": 10,
+    "model_name": "bic",
+    "convnet_type": "resnet32",
+    "device": ["0","1","2","3"],
+    "seed": [1993]
+}

exps/coil.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 2000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "sinkhorn":0.464,
+    "calibration_term":1.5,
+    "norm_term":3.0,
+    "reg_term":1e-3,
+    "model_name": "coil",
+    "convnet_type": "cosine_resnet18",
+    "device": ["0","1"],
+    "seed": [2003]
+}

exps/der.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "der",
+    "convnet_type": "resnet18",
+    "device": ["0","1"],
+    "seed": [1993]
+}

exps/ewc.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "cifar100",
+    "memory_size": 2000,
+    "memory_per_class": 20,
+    "fixed_memory": false,
+    "shuffle": true,
+    "init_cls": 10,
+    "increment": 10,
+    "model_name": "ewc",
+    "convnet_type": "resnet32",
+    "device": ["0","1","2","3"],
+    "seed": [1993]
+}

exps/fetril.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "prefix": "train",
+    "dataset": "stanfordcar",
+    "memory_size": 0,
+    "shuffle": true,
+    "init_cls": 40,
+    "increment": 1,
+    "model_name": "fetril",
+    "convnet_type": "resnet18",
+    "device": ["0"],
+    "seed": [2003],
+    "init_epochs": 100,
+    "init_lr" : 0.1,
+    "init_weight_decay" : 5e-4,
+    "epochs" : 80,
+    "lr" : 0.05,
+    "batch_size" : 32,
+    "weight_decay" : 5e-4,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/finetune.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "finetune",
+    "convnet_type": "resnet18",
+    "device": ["0"],
+    "seed": [2003]
+}

exps/foster.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "prefix": "cil",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "foster",
+    "convnet_type": "resnet18",
+    "device": ["0"],
+    "seed": [2003],
+    "beta1":0.96,
+    "beta2":0.97,
+    "oofc":"ft",
+    "is_teacher_wa":false,
+    "is_student_wa":false,
+    "lambda_okd":1,
+    "wa_value":1,
+    "init_epochs": 100,
+    "init_lr" : 0.1,
+    "init_weight_decay" : 5e-4,
+    "boosting_epochs" : 80,
+    "compression_epochs" : 50,
+    "lr" : 0.1,
+    "batch_size" : 32,
+    "weight_decay" : 5e-4,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/foster_general.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "prefix": "cil",
+    "dataset": "general_dataset",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "foster",
+    "convnet_type": "resnet18",
+    "device": ["0"],
+    "seed": [2003],
+    "beta1":0.96,
+    "beta2":0.97,
+    "oofc":"ft",
+    "is_teacher_wa":false,
+    "is_student_wa":false,
+    "lambda_okd":1,
+    "wa_value":1,
+    "init_epochs": 100,
+    "init_lr" : 0.1,
+    "init_weight_decay" : 5e-4,
+    "boosting_epochs" : 80,
+    "compression_epochs" : 50,
+    "lr" : 0.1,
+    "batch_size" : 32,
+    "weight_decay" : 5e-4,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/gem.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "gem",
+    "convnet_type": "resnet18",
+    "device": [ "0", "1"],
+    "seed": [2003]
+}

exps/icarl.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "icarl",
+    "convnet_type": "resnet18",
+    "device": ["0"],
+    "seed": [2003]
+}

exps/il2a.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+    "prefix": "cil",
+    "dataset": "stanfordcar",
+    "memory_size": 0,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "il2a",
+    "convnet_type": "resnet18_cbam",
+    "device": ["0", "1"],
+    "seed": [2003],
+    "lambda_fkd":10,
+    "lambda_proto":10,
+    "temp":0.1,
+    "epochs" : 1,
+    "lr" : 0.001,
+    "batch_size" : 32,
+    "weight_decay" : 2e-4,
+    "step_size":45,
+    "gamma":0.1,
+    "num_workers" : 8,
+    "ratio": 2.5,
+    "T" : 2
+}

exps/lwf.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 10,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "lwf",
+    "convnet_type": "resnet18",
+    "device":["0", "1"],
+    "seed": [2003]
+}

exps/memo.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    "prefix": "benchmark",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class":20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "memo",
+    "convnet_type": "memo_resnet18",
+    "train_base": true,
+    "train_adaptive": true,
+    "debug": false,
+    "skip": false,
+    "device": ["0", "1"],
+    "seed":[2003],
+    "scheduler": "steplr",
+    "init_epoch": 100,
+    "t_max": null,
+    "init_lr" : 0.1,
+    "init_weight_decay" : 5e-4,
+    "init_lr_decay" : 0.1,
+    "init_milestones" : [40,60,80],
+    "milestones" : [30,50,70],
+    "epochs": 80,
+    "lrate" : 0.1,
+    "batch_size" : 32,
+    "weight_decay" : 2e-4,
+    "lrate_decay" : 0.1,
+    "alpha_aux" : 1.0,
+    "backbone" : "models/finetune/reproduce_2003_resnet18_9.pkl"
+}

exps/pass.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "prefix": "train",
+    "dataset": "stanfordcar",
+    "memory_size": 0,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "pass",
+    "convnet_type": "resnet18_cbam",
+    "device": ["0"],
+    "seed": [2003],
+    "lambda_fkd":10,
+    "lambda_proto":10,
+    "temp":0.1,
+    "epochs" : 100,
+    "lr" : 0.001,
+    "batch_size" : 16,
+    "weight_decay" : 2e-4,
+    "step_size":45,
+    "gamma":0.1,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/podnet.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "increment",
+    "dataset": "stanfordcar",
+    "memory_size": 2000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "podnet",
+    "convnet_type": "cosine_resnet18",
+    "device": ["0","1"],
+    "seed": [2003]
+}

exps/replay.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "stanfordcar",
+    "memory_size": 4000,
+    "memory_per_class": 20,
+    "fixed_memory": true,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "replay",
+    "convnet_type": "resnet18",
+    "device": ["0"],
+    "seed": [2003]
+}

exps/rmm-foster.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "prefix": "rmm-foster",
+    "dataset": "stanfordcar",
+    "memory_size": 2000,
+    "m_rate_list":[0.3, 0.3, 0.3, 0.4, 0.4, 0.4],
+    "c_rate_list":[0.0, 0.0, 0.1, 0.1, 0.1, 0.0],
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "rmm-foster",
+    "convnet_type": "resnet18",
+    "device": ["0", "1"],
+    "seed": [2003],
+    "beta1":0.97,
+    "beta2":0.97,
+    "oofc":"ft",
+    "is_teacher_wa":false,
+    "is_student_wa":false,
+    "lambda_okd":1,
+    "wa_value":1,
+    "init_epochs": 1,
+    "init_lr" : 0.1,
+    "init_weight_decay" : 5e-4,
+    "boosting_epochs" : 1,
+    "compression_epochs" : 1,
+    "lr" : 0.1,
+    "batch_size" : 32,
+    "weight_decay" : 5e-4,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/rmm-icarl.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "prefix": "reproduce",
+    "dataset": "cifar100",
+    "m_rate_list":[0.8, 0.8, 0.6, 0.6, 0.6, 0.6],
+    "c_rate_list":[0.0, 0.0, 0.1, 0.1, 0.1, 0.0],
+    "memory_size": 2000,
+    "shuffle": true,
+    "init_cls": 50,
+    "increment": 10,
+    "model_name": "rmm-icarl",
+    "convnet_type": "resnet32",
+    "device": ["0"],
+    "seed": [1993]
+}

exps/rmm-pretrain.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "prefix": "pretrain-rmm",
+    "dataset": "cifar100",
+    "memory_size": 2000,
+    "shuffle": true,
+    "model_name": "rmm-icarl",
+    "convnet_type": "resnet32",
+    "device": ["0"],
+    "seed": [1993]
+}

exps/simplecil.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "prefix": "simplecil",
+    "dataset": "stanfordcar",
+    "memory_size": 0,
+    "memory_per_class": 0,
+    "fixed_memory": false,
+    "shuffle": true,
+    "init_cls": 50,
+    "increment": 20,
+    "model_name": "simplecil",
+    "convnet_type": "cosine_resnet18",
+    "device": ["0"],
+    "seed": [2003],
+    "checkpoint": "./models/simplecil/stanfordcar/0/20/simplecil_0.pkl",
+    "init_epoch": 1,
+    "init_lr": 0.01,
+    "batch_size": 32,
+    "weight_decay": 0.05,
+    "init_lr_decay": 0.1,
+    "init_weight_decay": 5e-4,
+    "min_lr": 0
+}

exps/simplecil_general.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "prefix": "simplecil",
+    "dataset": "general_dataset",
+    "memory_size": 0,
+    "memory_per_class": 0,
+    "fixed_memory": false,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "simplecil",
+    "convnet_type": "cosine_resnet18",
+    "device": [-1],
+    "seed": [2003],
+    "init_epoch": 1,
+    "init_lr": 0.01,
+    "batch_size": 32,
+    "weight_decay": 0.05,
+    "init_lr_decay": 0.1,
+    "init_weight_decay": 5e-4,
+    "min_lr": 0
+}

exps/simplecil_resume.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+    "prefix": "simplecil",
+    "dataset": "general_dataset",
+    "memory_size": 0,
+    "memory_per_class": 0,
+    "fixed_memory": false,
+    "shuffle": true,
+    "init_cls": 50,
+    "increment": 20,
+    "model_name": "simplecil",
+    "convnet_type": "cosine_resnet18",
+    "device": ["0"],
+    "seed": [2003],
+    "checkpoint": "./models/simplecil/stanfordcar/50/20/simplecil_0.pkl",
+    "data": "./car_data/car_data",
+    "init_epoch": 1,
+    "init_lr": 0.01,
+    "batch_size": 32,
+    "weight_decay": 0.05,
+    "init_lr_decay": 0.1,
+    "init_weight_decay": 5e-4,
+    "min_lr": 0
+}

exps/ssre.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "prefix": "ssre",
+    "dataset": "stanfordcar",
+    "memory_size": 0,
+    "shuffle": true,
+    "init_cls": 20,
+    "increment": 20,
+    "model_name": "ssre",
+    "convnet_type": "resnet18_rep",
+    "device": ["0"],
+    "seed": [2003],
+    "lambda_fkd":1,
+    "lambda_proto":10,
+    "temp":0.1,
+    "mode": "parallel_adapters",
+    "epochs" : 1,
+    "lr" : 0.0001,
+    "batch_size" : 32,
+    "weight_decay" : 5e-4,
+    "step_size":45,
+    "gamma":0.1,
+    "threshold": 0.8,
+    "num_workers" : 8,
+    "T" : 2
+}

exps/wa.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "prefix": "reproduce",
+    "dataset": "cifar100",
+    "memory_size": 2000,
+    "memory_per_class": 20,
+    "fixed_memory": false,
+    "shuffle": true,
+    "init_cls": 10,
+    "increment": 10,
+    "model_name": "wa",
+    "convnet_type": "resnet32",
+    "device": ["0","1","2","3"],
+    "seed": [1993]
+}

inference.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import sys
+import logging
+import copy
+import torch
+from PIL import Image
+import torchvision.transforms as transforms
+from torchvision.transforms.functional import pil_to_tensor
+from utils import factory
+from utils.data_manager import DataManager
+from utils.toolkit import count_parameters
+from utils.data_manager import pil_loader
+import os
+import numpy as np
+import json
+import argparse
+import imghdr
+import time
+def is_image_imghdr(path):
+  """
+  Checks if a path points to a valid image using imghdr.
+  Args:
+      path: The path to the file.
+  Returns:
+      True if the path is a valid image, False otherwise.
+  """
+  if not os.path.isfile(path):
+      return False
+  return imghdr.what(path) in ['jpeg', 'png']
+def _set_device(args):
+    device_type = args["device"]
+    gpus = []
+    for device in device_type:
+        if device == -1:
+            device = torch.device("cpu")
+        else:
+            device = torch.device("cuda:{}".format(device))
+        gpus.append(device)
+    args["device"] = gpus
+def get_methods(object, spacing=20):
+  methodList = []
+  for method_name in dir(object):
+    try:
+        if callable(getattr(object, method_name)):
+            methodList.append(str(method_name))
+    except Exception:
+        methodList.append(str(method_name))
+  processFunc = (lambda s: ' '.join(s.split())) or (lambda s: s)
+  for method in methodList:
+    try:
+        print(str(method.ljust(spacing)) + ' ' +
+              processFunc(str(getattr(object, method).__doc__)[0:90]))
+    except Exception:
+        print(method.ljust(spacing) + ' ' + ' getattr() failed')
+def load_model(args):
+    _set_device(args)
+    model = factory.get_model(args["model_name"], args)
+    model.load_checkpoint(args["checkpoint"])
+    return model
+def main():
+    args = setup_parser().parse_args()
+    param = load_json(args.config)
+    args = vars(args)  # Converting argparse Namespace to a dict.
+    args.update(param)  # Add parameters from json
+    assert args['output'].split(".")[-1] == "json" or os.path.isdir(args['output'])
+    model = load_model(args)
+    result = []
+    if is_image_imghdr(args['input']):
+        img = pil_to_tensor(pil_loader(args['input']))
+        img = img.unsqueeze(0)
+        predictions = model.inference(img)
+        out = {"img": args['input'].split("/")[-1]}
+        out.update({"predictions": [{"confident": confident, "index": pred, "label": label } for pred, label, confident in zip(predictions[0], predictions[1], predictions[2])]})
+        result.append(out)
+    else:
+        image_list = filter(lambda x: is_image_imghdr(os.path.join(args['input'], x)), os.listdir(args['input']))
+        for image in image_list:
+            print("Inference on image", image)
+            img = pil_to_tensor(pil_loader(os.path.join(args['input'], image)))
+            img = img.unsqueeze(0)
+            predictions = model.inference(img)
+            out = {"img": image.split("/")[-1]}
+            out.update({"predictions": [{"confident": confident, "index": pred, "label": label } for pred, label, confident in zip(predictions[0], predictions[1], predictions[2])]})
+            result.append(out)
+    if args['output'].split(".")[-1] == "json":
+        with open(args['output'], "w+") as f:
+            json.dump(result, f, indent=4)
+    else:
+        with open(os.path.join(args['output'], "output_model_{}.json".format(time.time())), "w+") as f:
+            json.dump(result, f, indent=4)
+def load_json(settings_path):
+    with open(settings_path) as data_file:
+        param = json.load(data_file)
+    return param
+def setup_parser():
+    parser = argparse.ArgumentParser(description='Reproduce of multiple continual learning algorthms.')
+    parser.add_argument('--config', type=str, help='Json file of settings.')
+    parser.add_argument('--checkpoint', type=str, help="path to checkpoint file. File must be a .pth format file")
+    parser.add_argument('--input', type=str, help="Path to input. This could be an folder or an image file")
+    parser.add_argument('--output', type=str, help = "Output path to save prediction")
+    return parser
+if __name__ == '__main__':
+    main()

install_awscli.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/bin/sh
+curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
+unzip awscliv2.zip
+./aws/install

load.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#! /bin/sh
+for arg in $@; do
+  python ./load_model.py --config=$arg;
+  # Your commands to process each argument here
+done