Spaces:

jinyin3
/

deo

Build error

App Files Files Community

jinyin_chen commited on Sep 9, 2024

Commit

e8b0040

1 Parent(s): 719eddc

test

Browse files

Files changed (27) hide show

.gitignore +7 -0
Dockerfile +35 -0
LICENSE +201 -0
README_zh.md +91 -0
app.py +16 -3
core/dsproc_mcls.py +167 -0
core/dsproc_mclsmfolder.py +194 -0
core/mengine.py +253 -0
dataset/label.txt +2 -0
dataset/train.txt +36 -0
dataset/val.txt +24 -0
dataset/val_dataset/51aa9b8d0da890cd1d0c5029e3d89e3c.jpg +0 -0
images/competition_title.png +0 -0
infer_api.py +35 -0
main.sh +1 -0
main_infer.py +142 -0
main_train.py +158 -0
main_train_single_gpu.py +149 -0
merge.py +17 -0
model/convnext.py +202 -0
model/replknet.py +353 -0
requirements.txt +19 -0
toolkit/chelper.py +84 -0
toolkit/cmetric.py +137 -0
toolkit/dhelper.py +36 -0
toolkit/dtransform.py +133 -0
toolkit/yacs.py +555 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+*.idea
+.DS_Store
+*.pth
+*.pyc
+*.ipynb
+__pycache_
+vision_rush_image*

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+# 引入cuda版本
+FROM  nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04
+# 设置工作目录
+WORKDIR /code
+RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone
+RUN apt-get update -y
+RUN apt-get install software-properties-common -y && add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get install python3.8 python3-pip curl libgl1 libglib2.0-0 ffmpeg libsm6 libxext6 -y  && apt-get clean &&  rm -rf /var/lib/apt/lists/*
+RUN update-alternatives --install /usr/bin/pytho3 python3 /usr/bin/python3.8 0
+RUN update-alternatives --set python3 /usr/bin/python3.8
+# 复制该./requirements.txt文件到工作目录中，安装python依赖库。
+ADD ./requirements.txt /code/requirements.txt
+RUN pip3 install pip --upgrade  -i https://pypi.mirrors.ustc.edu.cn/simple/
+RUN pip3 install -r requirements.txt -i https://pypi.mirrors.ustc.edu.cn/simple/ && rm -rf `pip3 cache dir`
+# 复制模型及代码到工作目录
+ADD ./core /code/core
+ADD ./dataset /code/dataset
+ADD ./model /code/model
+ADD ./pre_model /code/pre_model
+ADD ./final_model_csv /code/final_model_csv
+ADD ./toolkit /code/toolkit
+ADD ./infer_api.py /code/infer_api.py
+ADD ./main_infer.py /code/main_infer.py
+ADD ./main_train.py /code/main_train.py
+ADD ./merge.py /code/merge.py
+ADD ./main.sh /code/main.sh
+ADD ./README.md /code/README.md
+ADD ./Dockerfile /code/Dockerfile
+#运行python文件
+ENTRYPOINT ["python3","infer_api.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README_zh.md ADDED Viewed

	@@ -0,0 +1,91 @@

+<h2 align="center"> <a href="">DeepFake Defenders</a></h2>
+<h5 align="center"> 如果您喜欢我们的项目，请在 GitHub 上给我们一个Star ⭐ 以获取最新更新。  </h5>
+<h5 align="center">
+<!-- PROJECT SHIELDS -->
+[![License](https://img.shields.io/badge/License-Apache%202.0-yellow)](https://github.com/VisionRush/DeepFakeDefenders/blob/main/LICENSE)
+![GitHub contributors](https://img.shields.io/github/contributors/VisionRush/DeepFakeDefenders)
+[![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FVisionRush%2FDeepFakeDefenders&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=Visitors&edge_flat=false)](https://hits.seeyoufarm.com)
+![GitHub Repo stars](https://img.shields.io/github/stars/VisionRush/DeepFakeDefenders)
+[![GitHub issues](https://img.shields.io/github/issues/VisionRush/DeepFakeDefenders?color=critical&label=Issues)](https://github.com/PKU-YuanGroup/MoE-LLaVA/issues?q=is%3Aopen+is%3Aissue)
+[![GitHub closed issues](https://img.shields.io/github/issues-closed/VisionRush/DeepFakeDefenders?color=success&label=Issues)](https://github.com/PKU-YuanGroup/MoE-LLaVA/issues?q=is%3Aissue+is%3Aclosed)  <br>
+</h5>
+<p align='center'>
+  <img src='./images/competition_title.png' width='850'/>
+</p>
+💡 我们在这里提供了[[英文文档 / ENGLISH DOC](README.md)]，我们十分欢迎和感谢您能够对我们的项目提出建议和贡献。
+## 📣 News
+* **[2024.09.05]**  🔥 我们正式发布了Deepfake Defenders的初始版本，并在Deepfake挑战赛中获得了三等奖
+[[外滩大会](https://www.atecup.cn/deepfake)].
+## 🚀 快速开始
+### 一、预训练模型准备
+在开始使用之前，请将模型的ImageNet-1K预训练权重文件放置在`./pre_model`目录下，权重下载链接如下:
+```
+RepLKNet: https://drive.google.com/file/d/1vo-P3XB6mRLUeDzmgv90dOu73uCeLfZN/view?usp=sharing
+ConvNeXt: https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_384.pth
+```
+### 二、训练
+#### 1. 更改数据集路径
+将训练所需的训练集txt文件、验证集txt文件以及标签txt文件分别放置在dataset文件夹下，并命名为相同的文件名（dataset下有各个txt示例）
+#### 2. 更改超参数
+针对所采用的两个模型，在main_train.py中分别需要更改如下参数：
+```python
+RepLKNet---cfg.network.name = 'replknet'; cfg.train.batch_size = 16
+ConvNeXt---cfg.network.name = 'convnext'; cfg.train.batch_size = 24
+```
+#### 3. 启动训练
+##### 单机多卡训练：（8卡）
+```shell
+bash main.sh
+```
+##### 单机单卡训练：
+```shell
+CUDA_VISIBLE_DEVICES=0 python main_train_single_gpu.py
+```
+#### 4. 模型融合
+在merge.py中更改ConvNeXt模型路径以及RepLKNet模型路径，执行python merge.py后获取最终推理测试模型。
+#### 5. 推理
+示例如下，通过post请求接口请求，请求参数为图像路径，响应输出为模型预测的deepfake分数
+```python
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+import requests
+import json
+import requests
+import json
+header = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
+}
+url = 'http://ip:10005/inter_api'
+image_path = './dataset/val_dataset/51aa9b8d0da890cd1d0c5029e3d89e3c.jpg'
+data_map = {'img_path':image_path}
+response = requests.post(url, data=json.dumps(data_map), headers=header)
+content = response.content
+print(json.loads(content))
+```
+### 三、docker
+#### 1. docker构建
+    sudo docker build  -t vision-rush-image:1.0.1 --network host .
+#### 2. 容器启动
+    sudo docker run -d --name  vision_rush_image  --gpus=all  --net host  vision-rush-image:1.0.1
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=VisionRush/DeepFakeDefenders&type=Date)](https://star-history.com/#DeepFakeDefenders/DeepFakeDefenders&Date)

app.py CHANGED Viewed

@@ -1,7 +1,20 @@
 import gradio as gr
-def greet(name):
     return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+def greet(name, image):
+    # 这里我们只处理文本，忽略图片输入
     return "Hello " + name + "!!"
+# 定义输入：一个文本框和一个图片选择框
+inputs = [
+    gr.inputs.Textbox(label="Your Name"),
+    gr.inputs.Image(label="Your Image")
+]
+# 定义输出：一个文本框
+outputs = gr.outputs.Textbox()
+# 创建 Interface 对象，设置 live=False 以添加提交按钮
+demo = gr.Interface(fn=greet, inputs=inputs, outputs=outputs, live=False)
+# 启动界面
+demo.launch()

core/dsproc_mcls.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+import torch
+from PIL import Image
+from collections import OrderedDict
+from toolkit.dhelper import traverse_recursively
+import numpy as np
+import einops
+from torch import nn
+import timm
+import torch.nn.functional as F
+class SRMConv2d_simple(nn.Module):
+    def __init__(self, inc=3):
+        super(SRMConv2d_simple, self).__init__()
+        self.truc = nn.Hardtanh(-3, 3)
+        self.kernel = torch.from_numpy(self._build_kernel(inc)).float()
+    def forward(self, x):
+        out = F.conv2d(x, self.kernel, stride=1, padding=2)
+        out = self.truc(out)
+        return out
+    def _build_kernel(self, inc):
+        # filter1: KB
+        filter1 = [[0, 0, 0, 0, 0],
+                   [0, -1, 2, -1, 0],
+                   [0, 2, -4, 2, 0],
+                   [0, -1, 2, -1, 0],
+                   [0, 0, 0, 0, 0]]
+        # filter2：KV
+        filter2 = [[-1, 2, -2, 2, -1],
+                   [2, -6, 8, -6, 2],
+                   [-2, 8, -12, 8, -2],
+                   [2, -6, 8, -6, 2],
+                   [-1, 2, -2, 2, -1]]
+        # filter3：hor 2rd
+        filter3 = [[0, 0, 0, 0, 0],
+                   [0, 0, 0, 0, 0],
+                   [0, 1, -2, 1, 0],
+                   [0, 0, 0, 0, 0],
+                   [0, 0, 0, 0, 0]]
+        filter1 = np.asarray(filter1, dtype=float) / 4.
+        filter2 = np.asarray(filter2, dtype=float) / 12.
+        filter3 = np.asarray(filter3, dtype=float) / 2.
+        # statck the filters
+        filters = [[filter1],  # , filter1, filter1],
+                   [filter2],  # , filter2, filter2],
+                   [filter3]]  # , filter3, filter3]]
+        filters = np.array(filters)
+        filters = np.repeat(filters, inc, axis=1)
+        return filters
+class MultiClassificationProcessor(torch.utils.data.Dataset):
+    def __init__(self, transform=None):
+        self.transformer_ = transform
+        self.extension_ = '.jpg .jpeg .png .bmp .webp .tif .eps'
+        # load category info
+        self.ctg_names_ = []             # ctg_idx to ctg_name
+        self.ctg_name2idx_ = OrderedDict()  # ctg_name to ctg_idx
+        # load image infos
+        self.img_names_ = []    # img_idx to img_name
+        self.img_paths_ = []    # img_idx to img_path
+        self.img_labels_ = []    # img_idx to img_label
+        self.srm = SRMConv2d_simple()
+    def load_data_from_dir(self, dataset_list):
+        """Load image from folder.
+        Args:
+            dataset_list: dataset list, each folder is a category, format is [file_root].
+        """
+        # load sample
+        for img_root in dataset_list:
+            ctg_name = os.path.basename(img_root)
+            self.ctg_name2idx_[ctg_name] = len(self.ctg_names_)
+            self.ctg_names_.append(ctg_name)
+            img_paths = []
+            traverse_recursively(img_root, img_paths, self.extension_)
+            for img_path in img_paths:
+                img_name = os.path.basename(img_path)
+                self.img_names_.append(img_name)
+                self.img_paths_.append(img_path)
+                self.img_labels_.append(self.ctg_name2idx_[ctg_name])
+            print('log: category is %d(%s), image num is %d' % (self.ctg_name2idx_[ctg_name], ctg_name, len(img_paths)))
+    def load_data_from_txt(self, img_list_txt, ctg_list_txt):
+        """Load image from txt.
+        Args:
+            img_list_txt: image txt, format is [file_path, ctg_idx].
+            ctg_list_txt: category txt, format is [ctg_name, ctg_idx].
+        """
+        # check
+        assert os.path.exists(img_list_txt), 'log: does not exist: {}'.format(img_list_txt)
+        assert os.path.exists(ctg_list_txt), 'log: does not exist: {}'.format(ctg_list_txt)
+        # load category
+        # : open category info file
+        with open(ctg_list_txt) as f:
+            ctg_infos = [line.strip() for line in f.readlines()]
+        # :load category name & category index
+        for ctg_info in ctg_infos:
+            tmp      = ctg_info.split(' ')
+            ctg_name = tmp[0]
+            ctg_idx  = int(tmp[-1])
+            self.ctg_name2idx_[ctg_name] = ctg_idx
+            self.ctg_names_.append(ctg_name)
+        # load sample
+        # : open image info file
+        with open(img_list_txt) as f:
+            img_infos = [line.strip() for line in f.readlines()]
+        # : load image path & category index
+        for img_info in img_infos:
+            tmp      = img_info.split(' ')
+            img_path = ' '.join(tmp[:-1])
+            img_name = img_path.split('/')[-1]
+            ctg_idx  = int(tmp[-1])
+            self.img_names_.append(img_name)
+            self.img_paths_.append(img_path)
+            self.img_labels_.append(ctg_idx)
+        for ctg_name in self.ctg_names_:
+            print('log: category is %d(%s), image num is %d' % (self.ctg_name2idx_[ctg_name], ctg_name, self.img_labels_.count(self.ctg_name2idx_[ctg_name])))
+    def _add_new_channels_worker(self, image):
+        new_channels = []
+        image = einops.rearrange(image, "h w c -> c h w")
+        image = (image- torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_MEAN).view(-1, 1, 1)) / torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_STD).view(-1, 1, 1)
+        srm = self.srm(image.unsqueeze(0)).squeeze(0)
+        new_channels.append(einops.rearrange(srm, "c h w -> h w c").numpy())
+        new_channels = np.concatenate(new_channels, axis=2)
+        return torch.from_numpy(new_channels).float()
+    def add_new_channels(self, images):
+        images_copied = einops.rearrange(images, "c h w -> h w c")
+        new_channels = self._add_new_channels_worker(images_copied)
+        images_copied = torch.concatenate([images_copied, new_channels], dim=-1)
+        images_copied = einops.rearrange(images_copied, "h w c -> c h w")
+        return images_copied
+    def __getitem__(self, index):
+        img_path = self.img_paths_[index]
+        img_label = self.img_labels_[index]
+        img_data = Image.open(img_path).convert('RGB')
+        img_size = img_data.size[::-1]   # [h, w]
+        if self.transformer_ is not None:
+            img_data = self.transformer_[img_label](img_data)
+            img_data = self.add_new_channels(img_data)
+        return img_data, img_label, img_path, img_size
+    def __len__(self):
+        return len(self.img_names_)

core/dsproc_mclsmfolder.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os
+import torch
+from PIL import Image
+from collections import OrderedDict
+from toolkit.dhelper import traverse_recursively
+import random
+from torch import nn
+import numpy as np
+import timm
+import einops
+import torch.nn.functional as F
+class SRMConv2d_simple(nn.Module):
+    def __init__(self, inc=3):
+        super(SRMConv2d_simple, self).__init__()
+        self.truc = nn.Hardtanh(-3, 3)
+        self.kernel = torch.from_numpy(self._build_kernel(inc)).float()
+    def forward(self, x):
+        out = F.conv2d(x, self.kernel, stride=1, padding=2)
+        out = self.truc(out)
+        return out
+    def _build_kernel(self, inc):
+        # filter1: KB
+        filter1 = [[0, 0, 0, 0, 0],
+                   [0, -1, 2, -1, 0],
+                   [0, 2, -4, 2, 0],
+                   [0, -1, 2, -1, 0],
+                   [0, 0, 0, 0, 0]]
+        # filter2：KV
+        filter2 = [[-1, 2, -2, 2, -1],
+                   [2, -6, 8, -6, 2],
+                   [-2, 8, -12, 8, -2],
+                   [2, -6, 8, -6, 2],
+                   [-1, 2, -2, 2, -1]]
+        # filter3：hor 2rd
+        filter3 = [[0, 0, 0, 0, 0],
+                   [0, 0, 0, 0, 0],
+                   [0, 1, -2, 1, 0],
+                   [0, 0, 0, 0, 0],
+                   [0, 0, 0, 0, 0]]
+        filter1 = np.asarray(filter1, dtype=float) / 4.
+        filter2 = np.asarray(filter2, dtype=float) / 12.
+        filter3 = np.asarray(filter3, dtype=float) / 2.
+        # statck the filters
+        filters = [[filter1],  # , filter1, filter1],
+                   [filter2],  # , filter2, filter2],
+                   [filter3]]  # , filter3, filter3]]
+        filters = np.array(filters)
+        filters = np.repeat(filters, inc, axis=1)
+        return filters
+class MultiClassificationProcessor_mfolder(torch.utils.data.Dataset):
+    def __init__(self, transform=None):
+        self.transformer_  = transform
+        self.extension_    = '.jpg .jpeg .png .bmp .webp .tif .eps'
+        # load category info
+        self.ctg_names_    = []             # ctg_idx to ctg_name
+        self.ctg_name2idx_ = OrderedDict()  # ctg_name to ctg_idx
+        # load image infos
+        self.img_names_  = []    # img_idx to img_name
+        self.img_paths_  = []    # img_idx to img_path
+        self.img_labels_ = []    # img_idx to img_label
+        self.srm = SRMConv2d_simple()
+    def load_data_from_dir_test(self, folders):
+        # Load image from folder.
+        # Args:
+        #     dataset_list: dictionary where key is a label and value is a list of folder paths.
+        print(folders)
+        img_paths = []
+        traverse_recursively(folders, img_paths, self.extension_)
+        for img_path in img_paths:
+            img_name = os.path.basename(img_path)
+            self.img_names_.append(img_name)
+            self.img_paths_.append(img_path)
+        length = len(img_paths)
+        print('log: {} image num is {}'.format(folders, length))
+    def load_data_from_dir(self, dataset_list):
+        # Load image from folder.
+        # Args:
+        #     dataset_list: dictionary where key is a label and value is a list of folder paths.
+        for ctg_name, folders in dataset_list.items():
+            if ctg_name not in self.ctg_name2idx_:
+                self.ctg_name2idx_[ctg_name] = len(self.ctg_names_)
+                self.ctg_names_.append(ctg_name)
+            for img_root in folders:
+                img_paths = []
+                traverse_recursively(img_root, img_paths, self.extension_)
+                print(img_root)
+                length = len(img_paths)
+                for i in range(length):
+                    img_path = img_paths[i]
+                    img_name = os.path.basename(img_path)
+                    self.img_names_.append(img_name)
+                    self.img_paths_.append(img_path)
+                    self.img_labels_.append(self.ctg_name2idx_[ctg_name])
+            print('log: category is %d(%s), image num is %d' % (self.ctg_name2idx_[ctg_name], ctg_name, length))
+    def load_data_from_txt(self, img_list_txt, ctg_list_txt):
+        """Load image from txt.
+        Args:
+            img_list_txt: image txt, format is [file_path, ctg_idx].
+            ctg_list_txt: category txt, format is [ctg_name, ctg_idx].
+        """
+        # check
+        assert os.path.exists(img_list_txt), 'log: does not exist: {}'.format(img_list_txt)
+        assert os.path.exists(ctg_list_txt), 'log: does not exist: {}'.format(ctg_list_txt)
+        # load category
+        # : open category info file
+        with open(ctg_list_txt) as f:
+            ctg_infos = [line.strip() for line in f.readlines()]
+        # :load category name & category index
+        for ctg_info in ctg_infos:
+            tmp      = ctg_info.split(' ')
+            ctg_name = tmp[0]
+            ctg_idx  = int(tmp[1])
+            self.ctg_name2idx_[ctg_name] = ctg_idx
+            self.ctg_names_.append(ctg_name)
+        # load sample
+        # : open image info file
+        with open(img_list_txt) as f:
+            img_infos = [line.strip() for line in f.readlines()]
+        random.shuffle(img_infos)
+        # : load image path & category index
+        for img_info in img_infos:
+            img_path, ctg_name = img_info.rsplit(' ', 1)
+            img_name = img_path.split('/')[-1]
+            ctg_idx  = int(ctg_name)
+            self.img_names_.append(img_name)
+            self.img_paths_.append(img_path)
+            self.img_labels_.append(ctg_idx)
+        for ctg_name in self.ctg_names_:
+            print('log: category is %d(%s), image num is %d' % (self.ctg_name2idx_[ctg_name], ctg_name, self.img_labels_.count(self.ctg_name2idx_[ctg_name])))
+    def _add_new_channels_worker(self, image):
+        new_channels = []
+        image = einops.rearrange(image, "h w c -> c h w")
+        image = (image- torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_MEAN).view(-1, 1, 1)) / torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_STD).view(-1, 1, 1)
+        srm = self.srm(image.unsqueeze(0)).squeeze(0)
+        new_channels.append(einops.rearrange(srm, "c h w -> h w c").numpy())
+        new_channels = np.concatenate(new_channels, axis=2)
+        return torch.from_numpy(new_channels).float()
+    def add_new_channels(self, images):
+        images_copied = einops.rearrange(images, "c h w -> h w c")
+        new_channels = self._add_new_channels_worker(images_copied)
+        images_copied = torch.concatenate([images_copied, new_channels], dim=-1)
+        images_copied = einops.rearrange(images_copied, "h w c -> c h w")
+        return images_copied
+    def __getitem__(self, index):
+        img_path = self.img_paths_[index]
+        img_data = Image.open(img_path).convert('RGB')
+        img_size = img_data.size[::-1]   # [h, w]
+        all_data = []
+        for transform in self.transformer_:
+            current_data = transform(img_data)
+            current_data = self.add_new_channels(current_data)
+            all_data.append(current_data)
+        img_label = self.img_labels_[index]
+        return torch.stack(all_data, dim=0), img_label, img_path, img_size
+    def __len__(self):
+        return len(self.img_names_)

core/mengine.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import os
+import datetime
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import torch
+import torch.nn as nn
+from torch.nn.parallel import DistributedDataParallel as DDP
+from tqdm import tqdm
+from toolkit.cmetric import MultiClassificationMetric, MultilabelClassificationMetric, simple_accuracy
+from toolkit.chelper import load_model
+from torch import distributed as dist
+from sklearn.metrics import roc_auc_score
+import numpy as np
+import time
+def reduce_tensor(tensor, n):
+    rt = tensor.clone()
+    dist.all_reduce(rt, op=dist.ReduceOp.SUM)
+    rt /= n
+    return rt
+def gather_tensor(tensor, n):
+    rt = [torch.zeros_like(tensor) for _ in range(n)]
+    dist.all_gather(rt, tensor)
+    return torch.cat(rt, dim=0)
+class TrainEngine(object):
+    def __init__(self, local_rank, world_size=0, DDP=False, SyncBatchNorm=False):
+        # init setting
+        self.local_rank = local_rank
+        self.world_size = world_size
+        self.device_ = f'cuda:{local_rank}'
+        # create tool
+        self.cls_meter_ = MultilabelClassificationMetric()
+        self.loss_meter_ = MultiClassificationMetric()
+        self.top1_meter_ = MultiClassificationMetric()
+        self.DDP = DDP
+        self.SyncBN = SyncBatchNorm
+    def create_env(self, cfg):
+        # create network
+        self.netloc_ = load_model(cfg.network.name, cfg.network.class_num, self.SyncBN)
+        print(self.netloc_)
+        self.netloc_.cuda()
+        if self.DDP:
+            if self.SyncBN:
+                self.netloc_ = torch.nn.SyncBatchNorm.convert_sync_batchnorm(self.netloc_)
+            self.netloc_ = DDP(self.netloc_,
+                               device_ids=[self.local_rank],
+                               broadcast_buffers=True,
+                               )
+        # create loss function
+        self.criterion_ = nn.CrossEntropyLoss().cuda()
+        # create optimizer
+        self.optimizer_ = torch.optim.AdamW(self.netloc_.parameters(), lr=cfg.optimizer.lr,
+                                                betas=(cfg.optimizer.beta1, cfg.optimizer.beta2), eps=cfg.optimizer.eps,
+                                                weight_decay=cfg.optimizer.weight_decay)
+        # create scheduler
+        self.scheduler_ = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer_, cfg.train.epoch_num,
+                                                                         eta_min=cfg.scheduler.min_lr)
+    def train_multi_class(self, train_loader, epoch_idx, ema_start):
+        starttime = datetime.datetime.now()
+        # switch to train mode
+        self.netloc_.train()
+        self.loss_meter_.reset()
+        self.top1_meter_.reset()
+        # train
+        train_loader = tqdm(train_loader, desc='train', ascii=True)
+        for imgs_idx, (imgs_tensor, imgs_label, _, _) in enumerate(train_loader):
+            # set cuda
+            imgs_tensor = imgs_tensor.cuda()  # [256, 3, 224, 224]
+            imgs_label = imgs_label.cuda()
+            # clear gradients(zero the parameter gradients)
+            self.optimizer_.zero_grad()
+            # calc forward
+            preds = self.netloc_(imgs_tensor)
+            # calc acc & loss
+            loss = self.criterion_(preds, imgs_label)
+            # backpropagation
+            loss.backward()
+            # update parameters
+            self.optimizer_.step()
+            # EMA update
+            if ema_start:
+                self.ema_model.update(self.netloc_)
+            # accumulate loss & acc
+            acc1 = simple_accuracy(preds, imgs_label)
+            if self.DDP:
+                loss = reduce_tensor(loss, self.world_size)
+                acc1 = reduce_tensor(acc1, self.world_size)
+            self.loss_meter_.update(loss.data.item())
+            self.top1_meter_.update(acc1.item())
+        # eval
+        top1 = self.top1_meter_.mean
+        loss = self.loss_meter_.mean
+        endtime = datetime.datetime.now()
+        self.lr_ = self.optimizer_.param_groups[0]['lr']
+        if self.local_rank == 0:
+            print('log: epoch-%d, train_top1 is %f, train_loss is %f, lr is %f, time is %d' % (
+            epoch_idx, top1, loss, self.lr_, (endtime - starttime).seconds))
+        # return
+        return top1, loss, self.lr_
+    def val_multi_class(self, val_loader, epoch_idx):
+        np.set_printoptions(suppress=True)
+        starttime = datetime.datetime.now()
+        # switch to train mode
+        self.netloc_.eval()
+        self.loss_meter_.reset()
+        self.top1_meter_.reset()
+        self.all_probs = []
+        self.all_labels = []
+        # eval
+        with torch.no_grad():
+            val_loader = tqdm(val_loader, desc='valid', ascii=True)
+            for imgs_idx, (imgs_tensor, imgs_label, _, _) in enumerate(val_loader):
+                # set cuda
+                imgs_tensor = imgs_tensor.cuda()
+                imgs_label = imgs_label.cuda()
+                # calc forward
+                preds = self.netloc_(imgs_tensor)
+                # calc acc & loss
+                loss = self.criterion_(preds, imgs_label)
+                # accumulate loss & acc
+                acc1 = simple_accuracy(preds, imgs_label)
+                outputs_scores = nn.functional.softmax(preds, dim=1)
+                outputs_scores = torch.cat((outputs_scores, imgs_label.unsqueeze(-1)), dim=-1)
+                if self.DDP:
+                    loss = reduce_tensor(loss, self.world_size)
+                    acc1 = reduce_tensor(acc1, self.world_size)
+                    outputs_scores = gather_tensor(outputs_scores, self.world_size)
+                outputs_scores, label = outputs_scores[:, -2], outputs_scores[:, -1]
+                self.all_probs += [float(i) for i in outputs_scores]
+                self.all_labels += [ float(i) for i in label]
+                self.loss_meter_.update(loss.item())
+                self.top1_meter_.update(acc1.item())
+        # eval
+        top1 = self.top1_meter_.mean
+        loss = self.loss_meter_.mean
+        auc = roc_auc_score(self.all_labels, self.all_probs)
+        endtime = datetime.datetime.now()
+        if self.local_rank == 0:
+            print('log: epoch-%d, val_top1   is %f, val_loss   is %f, auc is %f, time is %d' % (
+            epoch_idx, top1, loss, auc, (endtime - starttime).seconds))
+        # update lr
+        self.scheduler_.step()
+        # return
+        return top1, loss, auc
+    def val_ema(self, val_loader, epoch_idx):
+        np.set_printoptions(suppress=True)
+        starttime = datetime.datetime.now()
+        # switch to train mode
+        self.ema_model.module.eval()
+        self.loss_meter_.reset()
+        self.top1_meter_.reset()
+        self.all_probs = []
+        self.all_labels = []
+        # eval
+        with torch.no_grad():
+            val_loader = tqdm(val_loader, desc='valid', ascii=True)
+            for imgs_idx, (imgs_tensor, imgs_label, _, _) in enumerate(val_loader):
+                # set cuda
+                imgs_tensor = imgs_tensor.cuda()
+                imgs_label = imgs_label.cuda()
+                # calc forward
+                preds = self.ema_model.module(imgs_tensor)
+                # calc acc & loss
+                loss = self.criterion_(preds, imgs_label)
+                # accumulate loss & acc
+                acc1 = simple_accuracy(preds, imgs_label)
+                outputs_scores = nn.functional.softmax(preds, dim=1)
+                outputs_scores = torch.cat((outputs_scores, imgs_label.unsqueeze(-1)), dim=-1)
+                if self.DDP:
+                    loss = reduce_tensor(loss, self.world_size)
+                    acc1 = reduce_tensor(acc1, self.world_size)
+                    outputs_scores = gather_tensor(outputs_scores, self.world_size)
+                outputs_scores, label = outputs_scores[:, -2], outputs_scores[:, -1]
+                self.all_probs += [float(i) for i in outputs_scores]
+                self.all_labels += [ float(i) for i in label]
+                self.loss_meter_.update(loss.item())
+                self.top1_meter_.update(acc1.item())
+        # eval
+        top1 = self.top1_meter_.mean
+        loss = self.loss_meter_.mean
+        auc = roc_auc_score(self.all_labels, self.all_probs)
+        endtime = datetime.datetime.now()
+        if self.local_rank == 0:
+            print('log: epoch-%d, ema_val_top1   is %f, ema_val_loss   is %f, ema_auc is %f, time is %d' % (
+            epoch_idx, top1, loss, auc, (endtime - starttime).seconds))
+        # return
+        return top1, loss, auc
+    def save_checkpoint(self, file_root, epoch_idx, train_map, val_map, ema_start):
+        file_name = os.path.join(file_root,
+                                 time.strftime('%Y%m%d-%H-%M', time.localtime()) + '-' + str(epoch_idx) + '.pth')
+        if self.DDP:
+            stact_dict = self.netloc_.module.state_dict()
+        else:
+            stact_dict = self.netloc_.state_dict()
+        torch.save(
+            {
+                'epoch_idx': epoch_idx,
+                'state_dict': stact_dict,
+                'train_map': train_map,
+                'val_map': val_map,
+                'lr': self.lr_,
+                'optimizer': self.optimizer_.state_dict(),
+                'scheduler': self.scheduler_.state_dict()
+            }, file_name)
+        if ema_start:
+            ema_file_name = os.path.join(file_root,
+                                                     time.strftime('%Y%m%d-%H-%M', time.localtime()) + '-EMA-' + str(epoch_idx) + '.pth')
+            ema_stact_dict = self.ema_model.module.module.state_dict()
+            torch.save(
+                {
+                    'epoch_idx': epoch_idx,
+                    'state_dict': ema_stact_dict,
+                    'train_map': train_map,
+                    'val_map': val_map,
+                    'lr': self.lr_,
+                    'optimizer': self.optimizer_.state_dict(),
+                    'scheduler': self.scheduler_.state_dict()
+                }, ema_file_name)

dataset/label.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ real 0
2	+ fake 1

dataset/train.txt ADDED Viewed

	@@ -0,0 +1,36 @@

+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/b580b1fc51d19fc25d2969de07669c21.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/df36afc7a12cf840a961743e08bdd596.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/f9cec5f76c7f653c2f57d66d7b4ecee0.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/a81dc092765e18f3e343b78418cf9371.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/56461dd348c9434f44dc810fd06a640e.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/1124b822bb0f1076a9914aa454bbd65f.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/2fa1aae309a57e975c90285001d43982.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/921604adb7ff623bd2fe32d454a1469c.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/773f7e1488a29cc52c52b154250df907.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/371ad468133750a5fdc670063a6b115a.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/780821e5db83764213aae04ac5a54671.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/39c253b508dea029854a01de7a1389ab.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/9726ea54c28b55e38a5c7cf2fbd8d9da.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/4112df80cf4849d05196dc23ecf994cd.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/f9858bf9cb1316c273d272249b725912.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/bcb50b7c399f978aeb5432c9d80d855c.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/2ffd8043985f407069d77dfaae68e032.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/0bfd7972fae0bc19f6087fc3b5ac6db8.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/daf90a7842ff5bd486ec10fbed22e932.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/9dbebbfbc11e8b757b090f5a5ad3fa48.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/b8d3d8c2c6cac9fb5b485b94e553f432.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/0a59fe7481dc0f9a7dc76cb0bdd3ffe6.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/5b82f90800df81625ac78e51f51f1b2e.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/badd574c91e6180ef829e2b0d67a3efb.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/7412c839b06db42aac1e022096b08031.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/81e4b3e7ce314bcd28dde338caeda836.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/aa87a563062e6b0741936609014329ab.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/0a4c5fdcbe7a3dca6c5a9ee45fd32bef.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/adfa3e7ea00ca1ce7a603a297c9ae701.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/31fcc0e2f049347b7220dd9eb4f66631.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/e699df9505f47dcbb1dcef6858f921e7.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/71e7824486a7fef83fa60324dd1cbba8.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/ed25cbc58d4f41f7c97201b1ba959834.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/nature/4b3d2176926766a4c0e259605dbbc67a.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/1dfd77d7ea1a1f05b9c2f532b2a91c62.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/trainset/ai/8e6bea47a8dd71c09c0272be5e1ca584.jpg 1

dataset/val.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/590e6bc87984f2b4e6d1ed6d4e889088.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/720f2234a138382af10b3e2bb6c373cd.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/01cb2d00e5d2412ce3cd1d1bb58d7d4e.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/41d70d6650eba9036cbb145b29ad14f7.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/f7f4df6525cdf0ec27f8f40e2e980ad6.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/1dddd03ae6911514a6f1d3117e7e3fd3.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/d33054b233cb2e0ebddbe63611626924.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/27f2e00bd12d11173422119dfad885ef.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/1a0cb2060fbc2065f2ba74f5b2833bc5.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/7e0668030bb9a6598621cc7f12600660.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/4d7548156c06f9ab12d6daa6524956ea.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/cb6a567da3e2f0bcfd19f81756242ba1.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/fbff80c8dddf176f310fc10748ce5796.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/d68dce56f306f7b0965329f2389b2d5a.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/610198886f92d595aaf7cd5c83521ccb.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/987a546ad4b3fb76552a89af9b8f5761.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/db80dfbe1bb84fe1f9c3e1f21f80561b.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/133c775e0516b078f2b951fe49d6b04a.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/9584c3c8e012f92b003498793a8a6492.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/nature/51aa9b8d0da890cd1d0c5029e3d89e3c.jpg 0
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/965c7d35e7a714603587a4710c357ede.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/7db2752f0d45637ff64e67f14099378e.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/cd9838425bb7e68f165b25a148ba8146.jpg 1
+/big-data/dataset-academic/multi-FFD/phase1_image/valset/ai/88f45da6e89e59842a9e6339d239a78f.jpg 1

dataset/val_dataset/51aa9b8d0da890cd1d0c5029e3d89e3c.jpg ADDED Viewed

images/competition_title.png ADDED Viewed

infer_api.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import uvicorn
+from fastapi import FastAPI, Body
+from pydantic import BaseModel, Field
+import sys
+import os
+import json
+from main_infer import INFER_API
+infer_api = INFER_API()
+# create FastAPI instance
+app = FastAPI()
+class inputModel(BaseModel):
+    img_path: str = Field(..., description="image path", examples=[""])
+# Call model interface, post request
+@app.post("/inter_api")
+def inter_api(input_model: inputModel):
+    img_path = input_model.img_path
+    infer_api = INFER_API()
+    score = infer_api.test(img_path)
+    return  score
+# run
+if __name__ == '__main__':
+    uvicorn.run(app='infer_api:app',
+                host='0.0.0.0',
+                port=10005,
+                reload=False,
+                workers=1
+                )

main.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 --use_env main_train.py

main_infer.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from PIL import Image
+import numpy as np
+import timm
+import einops
+import torch
+from torch import nn
+from toolkit.dtransform import create_transforms_inference, create_transforms_inference1,\
+                    create_transforms_inference2,\
+                    create_transforms_inference3,\
+                    create_transforms_inference4,\
+                    create_transforms_inference5
+from toolkit.chelper import load_model
+import torch.nn.functional as F
+def extract_model_from_pth(params_path, net_model):
+    checkpoint = torch.load(params_path)
+    state_dict = checkpoint['state_dict']
+    net_model.load_state_dict(state_dict, strict=True)
+    return net_model
+class SRMConv2d_simple(nn.Module):
+    def __init__(self, inc=3):
+        super(SRMConv2d_simple, self).__init__()
+        self.truc = nn.Hardtanh(-3, 3)
+        self.kernel = torch.from_numpy(self._build_kernel(inc)).float()
+    def forward(self, x):
+        out = F.conv2d(x, self.kernel, stride=1, padding=2)
+        out = self.truc(out)
+        return out
+    def _build_kernel(self, inc):
+        # filter1: KB
+        filter1 = [[0, 0, 0, 0, 0],
+                   [0, -1, 2, -1, 0],
+                   [0, 2, -4, 2, 0],
+                   [0, -1, 2, -1, 0],
+                   [0, 0, 0, 0, 0]]
+        # filter2：KV
+        filter2 = [[-1, 2, -2, 2, -1],
+                   [2, -6, 8, -6, 2],
+                   [-2, 8, -12, 8, -2],
+                   [2, -6, 8, -6, 2],
+                   [-1, 2, -2, 2, -1]]
+        # filter3：hor 2rd
+        filter3 = [[0, 0, 0, 0, 0],
+                   [0, 0, 0, 0, 0],
+                   [0, 1, -2, 1, 0],
+                   [0, 0, 0, 0, 0],
+                   [0, 0, 0, 0, 0]]
+        filter1 = np.asarray(filter1, dtype=float) / 4.
+        filter2 = np.asarray(filter2, dtype=float) / 12.
+        filter3 = np.asarray(filter3, dtype=float) / 2.
+        # statck the filters
+        filters = [[filter1],  # , filter1, filter1],
+                   [filter2],  # , filter2, filter2],
+                   [filter3]]  # , filter3, filter3]]
+        filters = np.array(filters)
+        filters = np.repeat(filters, inc, axis=1)
+        return filters
+class INFER_API:
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(INFER_API, cls).__new__(cls)
+            cls._instance.initialize()
+        return cls._instance
+    def initialize(self):
+        self.transformer_ = [create_transforms_inference(h=512, w=512),
+                        create_transforms_inference1(h=512, w=512),
+                        create_transforms_inference2(h=512, w=512),
+                        create_transforms_inference3(h=512, w=512),
+                        create_transforms_inference4(h=512, w=512),
+                        create_transforms_inference5(h=512, w=512)]
+        self.srm = SRMConv2d_simple()
+        # model init
+        self.model = load_model('all', 2)
+        model_path = './final_model_csv/final_model.pth'
+        self.model = extract_model_from_pth(model_path, self.model)
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = self.model.to(device)
+        self.model.eval()
+    def _add_new_channels_worker(self, image):
+        new_channels = []
+        image = einops.rearrange(image, "h w c -> c h w")
+        image = (image - torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_MEAN).view(-1, 1, 1)) / torch.as_tensor(
+            timm.data.constants.IMAGENET_DEFAULT_STD).view(-1, 1, 1)
+        srm = self.srm(image.unsqueeze(0)).squeeze(0)
+        new_channels.append(einops.rearrange(srm, "c h w -> h w c").numpy())
+        new_channels = np.concatenate(new_channels, axis=2)
+        return torch.from_numpy(new_channels).float()
+    def add_new_channels(self, images):
+        images_copied = einops.rearrange(images, "c h w -> h w c")
+        new_channels = self._add_new_channels_worker(images_copied)
+        images_copied = torch.concatenate([images_copied, new_channels], dim=-1)
+        images_copied = einops.rearrange(images_copied, "h w c -> c h w")
+        return images_copied
+    def test(self, img_path):
+        # img load
+        img_data = Image.open(img_path).convert('RGB')
+        # transform
+        all_data = []
+        for transform in self.transformer_:
+            current_data = transform(img_data)
+            current_data = self.add_new_channels(current_data)
+            all_data.append(current_data)
+        img_tensor = torch.stack(all_data, dim=0).unsqueeze(0).cuda()
+        preds = self.model(img_tensor)
+        return round(float(preds), 20)
+def main():
+    img = '51aa9b8d0da890cd1d0c5029e3d89e3c.jpg'
+    infer_api = INFER_API()
+    print(infer_api.test(img))
+if __name__ == '__main__':
+    main()

main_train.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os
+import time
+import datetime
+import torch
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from torch.utils.tensorboard import SummaryWriter
+from core.dsproc_mcls import MultiClassificationProcessor
+from core.mengine import TrainEngine
+from toolkit.dtransform import create_transforms_inference, transforms_imagenet_train
+from toolkit.yacs import CfgNode as CN
+from timm.utils import ModelEmaV3
+import warnings
+warnings.filterwarnings("ignore")
+# check
+print(torch.__version__)
+print(torch.cuda.is_available())
+# init
+cfg = CN(new_allowed=True)
+# dataset dir
+ctg_list = './dataset/label.txt'
+train_list = './dataset/train.txt'
+val_list = './dataset/val.txt'
+# : network
+cfg.network = CN(new_allowed=True)
+cfg.network.name = 'replknet'
+cfg.network.class_num = 2
+cfg.network.input_size = 384
+# : train params
+mean = (0.485, 0.456, 0.406)
+std = (0.229, 0.224, 0.225)
+cfg.train = CN(new_allowed=True)
+cfg.train.resume = False
+cfg.train.resume_path = ''
+cfg.train.params_path = ''
+cfg.train.batch_size = 16
+cfg.train.epoch_num = 20
+cfg.train.epoch_start = 0
+cfg.train.worker_num = 8
+# : optimizer params
+cfg.optimizer = CN(new_allowed=True)
+cfg.optimizer.lr = 1e-4 * 1
+cfg.optimizer.weight_decay = 1e-2
+cfg.optimizer.momentum = 0.9
+cfg.optimizer.beta1 = 0.9
+cfg.optimizer.beta2 = 0.999
+cfg.optimizer.eps = 1e-8
+# : scheduler params
+cfg.scheduler = CN(new_allowed=True)
+cfg.scheduler.min_lr = 1e-6
+# DDP init
+local_rank = int(os.environ['LOCAL_RANK'])
+device = 'cuda:{}'.format(local_rank)
+torch.cuda.set_device(local_rank)
+torch.distributed.init_process_group(backend='nccl', init_method='env://')
+world_size = torch.distributed.get_world_size()
+rank = torch.distributed.get_rank()
+# init path
+task = 'competition'
+log_root = 'output/' + datetime.datetime.now().strftime("%Y-%m-%d") + '-' + time.strftime(
+    "%H-%M-%S") + '_' + cfg.network.name + '_' + f"to_{task}_BinClass"
+if local_rank == 0:
+    if not os.path.exists(log_root):
+        os.makedirs(log_root)
+writer = SummaryWriter(log_root)
+# create engine
+train_engine = TrainEngine(local_rank, world_size, DDP=True, SyncBatchNorm=True)
+train_engine.create_env(cfg)
+# create transforms
+transforms_dict ={
+    0 : transforms_imagenet_train(img_size=(cfg.network.input_size, cfg.network.input_size)),
+    1 : transforms_imagenet_train(img_size=(cfg.network.input_size, cfg.network.input_size), jpeg_compression=1),
+}
+transforms_dict_test ={
+    0: create_transforms_inference(h=512, w=512),
+    1: create_transforms_inference(h=512, w=512),
+}
+transform = transforms_dict
+transform_test = transforms_dict_test
+# create dataset
+trainset = MultiClassificationProcessor(transform)
+trainset.load_data_from_txt(train_list, ctg_list)
+valset = MultiClassificationProcessor(transform_test)
+valset.load_data_from_txt(val_list, ctg_list)
+train_sampler = torch.utils.data.distributed.DistributedSampler(trainset)
+val_sampler = torch.utils.data.distributed.DistributedSampler(valset)
+# create dataloader
+train_loader = torch.utils.data.DataLoader(dataset=trainset,
+                                           batch_size=cfg.train.batch_size,
+                                           sampler=train_sampler,
+                                           num_workers=cfg.train.worker_num,
+                                           pin_memory=True,
+                                           drop_last=True)
+val_loader = torch.utils.data.DataLoader(dataset=valset,
+                                         batch_size=cfg.train.batch_size,
+                                         sampler=val_sampler,
+                                         num_workers=cfg.train.worker_num,
+                                         pin_memory=True,
+                                         drop_last=False)
+train_log_txtFile = log_root + "/" + "train_log.txt"
+f_open = open(train_log_txtFile, "w")
+# train & Val & Test
+best_test_mAP = 0.0
+best_test_idx = 0.0
+ema_start = True
+train_engine.ema_model = ModelEmaV3(train_engine.netloc_).cuda()
+for epoch_idx in range(cfg.train.epoch_start, cfg.train.epoch_num):
+    # train
+    train_top1, train_loss, train_lr = train_engine.train_multi_class(train_loader=train_loader, epoch_idx=epoch_idx, ema_start=ema_start)
+    # val
+    val_top1, val_loss, val_auc = train_engine.val_multi_class(val_loader=val_loader, epoch_idx=epoch_idx)
+    # ema_val
+    if ema_start:
+        ema_val_top1, ema_val_loss, ema_val_auc = train_engine.val_ema(val_loader=val_loader, epoch_idx=epoch_idx)
+    # check mAP and save
+    if local_rank == 0:
+        train_engine.save_checkpoint(log_root, epoch_idx, train_top1, val_top1, ema_start)
+        if ema_start:
+            outInfo = f"epoch_idx = {epoch_idx},  train_top1={train_top1}, train_loss={train_loss},val_top1={val_top1},val_loss={val_loss}, val_auc={val_auc}, ema_val_top1={ema_val_top1}, ema_val_loss={ema_val_loss}, ema_val_auc={ema_val_auc} \n"
+        else:
+            outInfo = f"epoch_idx = {epoch_idx},  train_top1={train_top1}, train_loss={train_loss},val_top1={val_top1},val_loss={val_loss}, val_auc={val_auc} \n"
+        print(outInfo)
+        f_open.write(outInfo)
+        f_open.flush()
+        # curve all mAP & mLoss
+        writer.add_scalars('top1', {'train': train_top1, 'valid': val_top1}, epoch_idx)
+        writer.add_scalars('loss', {'train': train_loss, 'valid': val_loss}, epoch_idx)
+        # curve lr
+        writer.add_scalar('train_lr', train_lr, epoch_idx)

main_train_single_gpu.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import os
+import time
+import datetime
+import torch
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from torch.utils.tensorboard import SummaryWriter
+from core.dsproc_mcls import MultiClassificationProcessor
+from core.mengine import TrainEngine
+from toolkit.dtransform import create_transforms_inference, transforms_imagenet_train
+from toolkit.yacs import CfgNode as CN
+from timm.utils import ModelEmaV3
+import warnings
+warnings.filterwarnings("ignore")
+# check
+print(torch.__version__)
+print(torch.cuda.is_available())
+# init
+cfg = CN(new_allowed=True)
+# dataset dir
+ctg_list = './dataset/label.txt'
+train_list = './dataset/train.txt'
+val_list = './dataset/val.txt'
+# : network
+cfg.network = CN(new_allowed=True)
+cfg.network.name = 'replknet'
+cfg.network.class_num = 2
+cfg.network.input_size = 384
+# : train params
+mean = (0.485, 0.456, 0.406)
+std = (0.229, 0.224, 0.225)
+cfg.train = CN(new_allowed=True)
+cfg.train.resume = False
+cfg.train.resume_path = ''
+cfg.train.params_path = ''
+cfg.train.batch_size = 16
+cfg.train.epoch_num = 20
+cfg.train.epoch_start = 0
+cfg.train.worker_num = 8
+# : optimizer params
+cfg.optimizer = CN(new_allowed=True)
+cfg.optimizer.lr = 1e-4 * 1
+cfg.optimizer.weight_decay = 1e-2
+cfg.optimizer.momentum = 0.9
+cfg.optimizer.beta1 = 0.9
+cfg.optimizer.beta2 = 0.999
+cfg.optimizer.eps = 1e-8
+# : scheduler params
+cfg.scheduler = CN(new_allowed=True)
+cfg.scheduler.min_lr = 1e-6
+# init path
+task = 'competition'
+log_root = 'output/' + datetime.datetime.now().strftime("%Y-%m-%d") + '-' + time.strftime(
+    "%H-%M-%S") + '_' + cfg.network.name + '_' + f"to_{task}_BinClass"
+if not os.path.exists(log_root):
+    os.makedirs(log_root)
+writer = SummaryWriter(log_root)
+# create engine
+train_engine = TrainEngine(0, 0, DDP=False, SyncBatchNorm=False)
+train_engine.create_env(cfg)
+# create transforms
+transforms_dict = {
+    0: transforms_imagenet_train(img_size=(cfg.network.input_size, cfg.network.input_size)),
+    1: transforms_imagenet_train(img_size=(cfg.network.input_size, cfg.network.input_size), jpeg_compression=1),
+}
+transforms_dict_test = {
+    0: create_transforms_inference(h=512, w=512),
+    1: create_transforms_inference(h=512, w=512),
+}
+transform = transforms_dict
+transform_test = transforms_dict_test
+# create dataset
+trainset = MultiClassificationProcessor(transform)
+trainset.load_data_from_txt(train_list, ctg_list)
+valset = MultiClassificationProcessor(transform_test)
+valset.load_data_from_txt(val_list, ctg_list)
+# create dataloader
+train_loader = torch.utils.data.DataLoader(dataset=trainset,
+                                           batch_size=cfg.train.batch_size,
+                                           num_workers=cfg.train.worker_num,
+                                           shuffle=True,
+                                           pin_memory=True,
+                                           drop_last=True)
+val_loader = torch.utils.data.DataLoader(dataset=valset,
+                                         batch_size=cfg.train.batch_size,
+                                         num_workers=cfg.train.worker_num,
+                                         shuffle=False,
+                                         pin_memory=True,
+                                         drop_last=False)
+train_log_txtFile = log_root + "/" + "train_log.txt"
+f_open = open(train_log_txtFile, "w")
+# train & Val & Test
+best_test_mAP = 0.0
+best_test_idx = 0.0
+ema_start = True
+train_engine.ema_model = ModelEmaV3(train_engine.netloc_).cuda()
+for epoch_idx in range(cfg.train.epoch_start, cfg.train.epoch_num):
+    # train
+    train_top1, train_loss, train_lr = train_engine.train_multi_class(train_loader=train_loader, epoch_idx=epoch_idx,
+                                                                      ema_start=ema_start)
+    # val
+    val_top1, val_loss, val_auc = train_engine.val_multi_class(val_loader=val_loader, epoch_idx=epoch_idx)
+    # ema_val
+    if ema_start:
+        ema_val_top1, ema_val_loss, ema_val_auc = train_engine.val_ema(val_loader=val_loader, epoch_idx=epoch_idx)
+    train_engine.save_checkpoint(log_root, epoch_idx, train_top1, val_top1, ema_start)
+    if ema_start:
+        outInfo = f"epoch_idx = {epoch_idx},  train_top1={train_top1}, train_loss={train_loss},val_top1={val_top1},val_loss={val_loss}, val_auc={val_auc}, ema_val_top1={ema_val_top1}, ema_val_loss={ema_val_loss}, ema_val_auc={ema_val_auc} \n"
+    else:
+        outInfo = f"epoch_idx = {epoch_idx},  train_top1={train_top1}, train_loss={train_loss},val_top1={val_top1},val_loss={val_loss}, val_auc={val_auc} \n"
+    print(outInfo)
+    f_open.write(outInfo)
+    # 刷新文件
+    f_open.flush()
+    # curve all mAP & mLoss
+    writer.add_scalars('top1', {'train': train_top1, 'valid': val_top1}, epoch_idx)
+    writer.add_scalars('loss', {'train': train_loss, 'valid': val_loss}, epoch_idx)
+    # curve lr
+    writer.add_scalar('train_lr', train_lr, epoch_idx)

merge.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from toolkit.chelper import final_model
+import torch
+import os
+# Trained ConvNeXt and RepLKNet paths (for reference)
+convnext_path = './final_model_csv/convnext_end.pth'
+replknet_path = './final_model_csv/replk_end.pth'
+model = final_model()
+model.convnext.load_state_dict(torch.load(convnext_path, map_location='cpu')['state_dict'], strict=True)
+model.replknet.load_state_dict(torch.load(replknet_path, map_location='cpu')['state_dict'], strict=True)
+if not os.path.exists('./final_model_csv'):
+    os.makedirs('./final_model_csv')
+torch.save({'state_dict': model.state_dict()}, './final_model_csv/final_model.pth')

model/convnext.py ADDED Viewed

	@@ -0,0 +1,202 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_, DropPath
+from timm.models.registry import register_model
+class Block(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class ConvNeXt(nn.Module):
+    r""" ConvNeXt
+        A PyTorch impl of : `A ConvNet for the 2020s`  -
+          https://arxiv.org/pdf/2201.03545.pdf
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(self, in_chans=3, num_classes=1000,
+                 depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
+                 layer_scale_init_value=1e-6, head_init_scale=1.,
+                 ):
+        super().__init__()
+        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
+                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
+        self.head = nn.Linear(dims[-1], num_classes)
+        self.apply(self._init_weights)
+        self.head.weight.data.mul_(head_init_scale)
+        self.head.bias.data.mul_(head_init_scale)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+model_urls = {
+    "convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
+    "convnext_small_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
+    "convnext_base_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
+    "convnext_large_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
+    "convnext_tiny_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_22k_224.pth",
+    "convnext_small_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_22k_224.pth",
+    "convnext_base_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth",
+    "convnext_large_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth",
+    "convnext_xlarge_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth",
+}
+@register_model
+def convnext_tiny(pretrained=False,in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_tiny_22k'] if in_22k else model_urls['convnext_tiny_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
+        model.load_state_dict(checkpoint["model"])
+    return model
+@register_model
+def convnext_small(pretrained=False,in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_small_22k'] if in_22k else model_urls['convnext_small_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model
+@register_model
+def convnext_base(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_base_22k'] if in_22k else model_urls['convnext_base_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model
+@register_model
+def convnext_large(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
+    if pretrained:
+        url = model_urls['convnext_large_22k'] if in_22k else model_urls['convnext_large_1k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model
+@register_model
+def convnext_xlarge(pretrained=False, in_22k=False, **kwargs):
+    model = ConvNeXt(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
+    if pretrained:
+        assert in_22k, "only ImageNet-22K pre-trained ConvNeXt-XL is available; please set in_22k=True"
+        url = model_urls['convnext_xlarge_22k']
+        checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu")
+        model.load_state_dict(checkpoint["model"])
+    return model

model/replknet.py ADDED Viewed

	@@ -0,0 +1,353 @@

+# Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs (https://arxiv.org/abs/2203.06717)
+# Github source: https://github.com/DingXiaoH/RepLKNet-pytorch
+# Licensed under The MIT License [see LICENSE for details]
+# Based on ConvNeXt, timm, DINO and DeiT code bases
+# https://github.com/facebookresearch/ConvNeXt
+# https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# https://github.com/facebookresearch/deit/
+# https://github.com/facebookresearch/dino
+# --------------------------------------------------------'
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as checkpoint
+from timm.models.layers import DropPath
+import sys
+import os
+def get_conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias):
+    if type(kernel_size) is int:
+        use_large_impl = kernel_size > 5
+    else:
+        assert len(kernel_size) == 2 and kernel_size[0] == kernel_size[1]
+        use_large_impl = kernel_size[0] > 5
+    has_large_impl = 'LARGE_KERNEL_CONV_IMPL' in os.environ
+    if has_large_impl and in_channels == out_channels and out_channels == groups and use_large_impl and stride == 1 and padding == kernel_size // 2 and dilation == 1:
+        sys.path.append(os.environ['LARGE_KERNEL_CONV_IMPL'])
+        #   Please follow the instructions https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/README.md
+        #   export LARGE_KERNEL_CONV_IMPL=absolute_path_to_where_you_cloned_the_example (i.e., depthwise_conv2d_implicit_gemm.py)
+        # TODO more efficient PyTorch implementations of large-kernel convolutions. Pull requests are welcomed.
+        # Or you may try MegEngine. We have integrated an efficient implementation into MegEngine and it will automatically use it.
+        from depthwise_conv2d_implicit_gemm import DepthWiseConv2dImplicitGEMM
+        return DepthWiseConv2dImplicitGEMM(in_channels, kernel_size, bias=bias)
+    else:
+        return nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
+                         padding=padding, dilation=dilation, groups=groups, bias=bias)
+use_sync_bn = False
+def enable_sync_bn():
+    global use_sync_bn
+    use_sync_bn = True
+def get_bn(channels):
+    if use_sync_bn:
+        return nn.SyncBatchNorm(channels)
+    else:
+        return nn.BatchNorm2d(channels)
+def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups, dilation=1):
+    if padding is None:
+        padding = kernel_size // 2
+    result = nn.Sequential()
+    result.add_module('conv', get_conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                         stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False))
+    result.add_module('bn', get_bn(out_channels))
+    return result
+def conv_bn_relu(in_channels, out_channels, kernel_size, stride, padding, groups, dilation=1):
+    if padding is None:
+        padding = kernel_size // 2
+    result = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                         stride=stride, padding=padding, groups=groups, dilation=dilation)
+    result.add_module('nonlinear', nn.ReLU())
+    return result
+def fuse_bn(conv, bn):
+    kernel = conv.weight
+    running_mean = bn.running_mean
+    running_var = bn.running_var
+    gamma = bn.weight
+    beta = bn.bias
+    eps = bn.eps
+    std = (running_var + eps).sqrt()
+    t = (gamma / std).reshape(-1, 1, 1, 1)
+    return kernel * t, beta - running_mean * gamma / std
+class ReparamLargeKernelConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size,
+                 stride, groups,
+                 small_kernel,
+                 small_kernel_merged=False):
+        super(ReparamLargeKernelConv, self).__init__()
+        self.kernel_size = kernel_size
+        self.small_kernel = small_kernel
+        # We assume the conv does not change the feature map size, so padding = k//2. Otherwise, you may configure padding as you wish, and change the padding of small_conv accordingly.
+        padding = kernel_size // 2
+        if small_kernel_merged:
+            self.lkb_reparam = get_conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                          stride=stride, padding=padding, dilation=1, groups=groups, bias=True)
+        else:
+            self.lkb_origin = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
+                                      stride=stride, padding=padding, dilation=1, groups=groups)
+            if small_kernel is not None:
+                assert small_kernel <= kernel_size, 'The kernel size for re-param cannot be larger than the large kernel!'
+                self.small_conv = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=small_kernel,
+                                             stride=stride, padding=small_kernel//2, groups=groups, dilation=1)
+    def forward(self, inputs):
+        if hasattr(self, 'lkb_reparam'):
+            out = self.lkb_reparam(inputs)
+        else:
+            out = self.lkb_origin(inputs)
+            if hasattr(self, 'small_conv'):
+                out += self.small_conv(inputs)
+        return out
+    def get_equivalent_kernel_bias(self):
+        eq_k, eq_b = fuse_bn(self.lkb_origin.conv, self.lkb_origin.bn)
+        if hasattr(self, 'small_conv'):
+            small_k, small_b = fuse_bn(self.small_conv.conv, self.small_conv.bn)
+            eq_b += small_b
+            #   add to the central part
+            eq_k += nn.functional.pad(small_k, [(self.kernel_size - self.small_kernel) // 2] * 4)
+        return eq_k, eq_b
+    def merge_kernel(self):
+        eq_k, eq_b = self.get_equivalent_kernel_bias()
+        self.lkb_reparam = get_conv2d(in_channels=self.lkb_origin.conv.in_channels,
+                                     out_channels=self.lkb_origin.conv.out_channels,
+                                     kernel_size=self.lkb_origin.conv.kernel_size, stride=self.lkb_origin.conv.stride,
+                                     padding=self.lkb_origin.conv.padding, dilation=self.lkb_origin.conv.dilation,
+                                     groups=self.lkb_origin.conv.groups, bias=True)
+        self.lkb_reparam.weight.data = eq_k
+        self.lkb_reparam.bias.data = eq_b
+        self.__delattr__('lkb_origin')
+        if hasattr(self, 'small_conv'):
+            self.__delattr__('small_conv')
+class ConvFFN(nn.Module):
+    def __init__(self, in_channels, internal_channels, out_channels, drop_path):
+        super().__init__()
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.preffn_bn = get_bn(in_channels)
+        self.pw1 = conv_bn(in_channels=in_channels, out_channels=internal_channels, kernel_size=1, stride=1, padding=0, groups=1)
+        self.pw2 = conv_bn(in_channels=internal_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, groups=1)
+        self.nonlinear = nn.GELU()
+    def forward(self, x):
+        out = self.preffn_bn(x)
+        out = self.pw1(out)
+        out = self.nonlinear(out)
+        out = self.pw2(out)
+        return x + self.drop_path(out)
+class RepLKBlock(nn.Module):
+    def __init__(self, in_channels, dw_channels, block_lk_size, small_kernel, drop_path, small_kernel_merged=False):
+        super().__init__()
+        self.pw1 = conv_bn_relu(in_channels, dw_channels, 1, 1, 0, groups=1)
+        self.pw2 = conv_bn(dw_channels, in_channels, 1, 1, 0, groups=1)
+        self.large_kernel = ReparamLargeKernelConv(in_channels=dw_channels, out_channels=dw_channels, kernel_size=block_lk_size,
+                                                  stride=1, groups=dw_channels, small_kernel=small_kernel, small_kernel_merged=small_kernel_merged)
+        self.lk_nonlinear = nn.ReLU()
+        self.prelkb_bn = get_bn(in_channels)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        print('drop path:', self.drop_path)
+    def forward(self, x):
+        out = self.prelkb_bn(x)
+        out = self.pw1(out)
+        out = self.large_kernel(out)
+        out = self.lk_nonlinear(out)
+        out = self.pw2(out)
+        return x + self.drop_path(out)
+class RepLKNetStage(nn.Module):
+    def __init__(self, channels, num_blocks, stage_lk_size, drop_path,
+                 small_kernel, dw_ratio=1, ffn_ratio=4,
+                 use_checkpoint=False,      # train with torch.utils.checkpoint to save memory
+                 small_kernel_merged=False,
+                 norm_intermediate_features=False):
+        super().__init__()
+        self.use_checkpoint = use_checkpoint
+        blks = []
+        for i in range(num_blocks):
+            block_drop_path = drop_path[i] if isinstance(drop_path, list) else drop_path
+            #   Assume all RepLK Blocks within a stage share the same lk_size. You may tune it on your own model.
+            replk_block = RepLKBlock(in_channels=channels, dw_channels=int(channels * dw_ratio), block_lk_size=stage_lk_size,
+                                     small_kernel=small_kernel, drop_path=block_drop_path, small_kernel_merged=small_kernel_merged)
+            convffn_block = ConvFFN(in_channels=channels, internal_channels=int(channels * ffn_ratio), out_channels=channels,
+                                    drop_path=block_drop_path)
+            blks.append(replk_block)
+            blks.append(convffn_block)
+        self.blocks = nn.ModuleList(blks)
+        if norm_intermediate_features:
+            self.norm = get_bn(channels)    #   Only use this with RepLKNet-XL on downstream tasks
+        else:
+            self.norm = nn.Identity()
+    def forward(self, x):
+        for blk in self.blocks:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x)   # Save training memory
+            else:
+                x = blk(x)
+        return x
+class RepLKNet(nn.Module):
+    def __init__(self, large_kernel_sizes, layers, channels, drop_path_rate, small_kernel,
+                 dw_ratio=1, ffn_ratio=4, in_channels=3, num_classes=1000, out_indices=None,
+                 use_checkpoint=False,
+                 small_kernel_merged=False,
+                 use_sync_bn=True,
+                 norm_intermediate_features=False       # for RepLKNet-XL on COCO and ADE20K, use an extra BN to normalize the intermediate feature maps then feed them into the heads
+                 ):
+        super().__init__()
+        if num_classes is None and out_indices is None:
+            raise ValueError('must specify one of num_classes (for pretraining) and out_indices (for downstream tasks)')
+        elif num_classes is not None and out_indices is not None:
+            raise ValueError('cannot specify both num_classes (for pretraining) and out_indices (for downstream tasks)')
+        elif num_classes is not None and norm_intermediate_features:
+            raise ValueError('for pretraining, no need to normalize the intermediate feature maps')
+        self.out_indices = out_indices
+        if use_sync_bn:
+            enable_sync_bn()
+        base_width = channels[0]
+        self.use_checkpoint = use_checkpoint
+        self.norm_intermediate_features = norm_intermediate_features
+        self.num_stages = len(layers)
+        self.stem = nn.ModuleList([
+            conv_bn_relu(in_channels=in_channels, out_channels=base_width, kernel_size=3, stride=2, padding=1, groups=1),
+            conv_bn_relu(in_channels=base_width, out_channels=base_width, kernel_size=3, stride=1, padding=1, groups=base_width),
+            conv_bn_relu(in_channels=base_width, out_channels=base_width, kernel_size=1, stride=1, padding=0, groups=1),
+            conv_bn_relu(in_channels=base_width, out_channels=base_width, kernel_size=3, stride=2, padding=1, groups=base_width)])
+        # stochastic depth. We set block-wise drop-path rate. The higher level blocks are more likely to be dropped. This implementation follows Swin.
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))]
+        self.stages = nn.ModuleList()
+        self.transitions = nn.ModuleList()
+        for stage_idx in range(self.num_stages):
+            layer = RepLKNetStage(channels=channels[stage_idx], num_blocks=layers[stage_idx],
+                                  stage_lk_size=large_kernel_sizes[stage_idx],
+                                  drop_path=dpr[sum(layers[:stage_idx]):sum(layers[:stage_idx + 1])],
+                                  small_kernel=small_kernel, dw_ratio=dw_ratio, ffn_ratio=ffn_ratio,
+                                  use_checkpoint=use_checkpoint, small_kernel_merged=small_kernel_merged,
+                                  norm_intermediate_features=norm_intermediate_features)
+            self.stages.append(layer)
+            if stage_idx < len(layers) - 1:
+                transition = nn.Sequential(
+                    conv_bn_relu(channels[stage_idx], channels[stage_idx + 1], 1, 1, 0, groups=1),
+                    conv_bn_relu(channels[stage_idx + 1], channels[stage_idx + 1], 3, stride=2, padding=1, groups=channels[stage_idx + 1]))
+                self.transitions.append(transition)
+        if num_classes is not None:
+            self.norm = get_bn(channels[-1])
+            self.avgpool = nn.AdaptiveAvgPool2d(1)
+            self.head = nn.Linear(channels[-1], num_classes)
+    def forward_features(self, x):
+        x = self.stem[0](x)
+        for stem_layer in self.stem[1:]:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(stem_layer, x)     # save memory
+            else:
+                x = stem_layer(x)
+        if self.out_indices is None:
+            #   Just need the final output
+            for stage_idx in range(self.num_stages):
+                x = self.stages[stage_idx](x)
+                if stage_idx < self.num_stages - 1:
+                    x = self.transitions[stage_idx](x)
+            return x
+        else:
+            #   Need the intermediate feature maps
+            outs = []
+            for stage_idx in range(self.num_stages):
+                x = self.stages[stage_idx](x)
+                if stage_idx in self.out_indices:
+                    outs.append(self.stages[stage_idx].norm(x))     # For RepLKNet-XL normalize the features before feeding them into the heads
+                if stage_idx < self.num_stages - 1:
+                    x = self.transitions[stage_idx](x)
+            return outs
+    def forward(self, x):
+        x = self.forward_features(x)
+        if self.out_indices:
+            return x
+        else:
+            x = self.norm(x)
+            x = self.avgpool(x)
+            x = torch.flatten(x, 1)
+            x = self.head(x)
+            return x
+    def structural_reparam(self):
+        for m in self.modules():
+            if hasattr(m, 'merge_kernel'):
+                m.merge_kernel()
+    #   If your framework cannot automatically fuse BN for inference, you may do it manually.
+    #   The BNs after and before conv layers can be removed.
+    #   No need to call this if your framework support automatic BN fusion.
+    def deep_fuse_BN(self):
+        for m in self.modules():
+            if not isinstance(m, nn.Sequential):
+                continue
+            if not len(m) in [2, 3]:  # Only handle conv-BN or conv-BN-relu
+                continue
+            #   If you use a custom Conv2d impl, assume it also has 'kernel_size' and 'weight'
+            if hasattr(m[0], 'kernel_size') and hasattr(m[0], 'weight') and isinstance(m[1], nn.BatchNorm2d):
+                conv = m[0]
+                bn = m[1]
+                fused_kernel, fused_bias = fuse_bn(conv, bn)
+                fused_conv = get_conv2d(conv.in_channels, conv.out_channels, kernel_size=conv.kernel_size,
+                                        stride=conv.stride,
+                                        padding=conv.padding, dilation=conv.dilation, groups=conv.groups, bias=True)
+                fused_conv.weight.data = fused_kernel
+                fused_conv.bias.data = fused_bias
+                m[0] = fused_conv
+                m[1] = nn.Identity()
+def create_RepLKNet31B(drop_path_rate=0.5, num_classes=1000, use_checkpoint=False, small_kernel_merged=False, use_sync_bn=True):
+    return RepLKNet(large_kernel_sizes=[31,29,27,13], layers=[2,2,18,2], channels=[128,256,512,1024],
+                    drop_path_rate=drop_path_rate, small_kernel=5, num_classes=num_classes, use_checkpoint=use_checkpoint,
+                    small_kernel_merged=small_kernel_merged, use_sync_bn=use_sync_bn)
+def create_RepLKNet31L(drop_path_rate=0.3, num_classes=1000, use_checkpoint=True, small_kernel_merged=False):
+    return RepLKNet(large_kernel_sizes=[31,29,27,13], layers=[2,2,18,2], channels=[192,384,768,1536],
+                    drop_path_rate=drop_path_rate, small_kernel=5, num_classes=num_classes, use_checkpoint=use_checkpoint,
+                    small_kernel_merged=small_kernel_merged)
+def create_RepLKNetXL(drop_path_rate=0.3, num_classes=1000, use_checkpoint=True, small_kernel_merged=False):
+    return RepLKNet(large_kernel_sizes=[27,27,27,13], layers=[2,2,18,2], channels=[256,512,1024,2048],
+                    drop_path_rate=drop_path_rate, small_kernel=None, dw_ratio=1.5,
+                    num_classes=num_classes, use_checkpoint=use_checkpoint,
+                    small_kernel_merged=small_kernel_merged)
+if __name__ == '__main__':
+    model = create_RepLKNet31B(small_kernel_merged=False)
+    model.eval()
+    print('------------------- training-time model -------------')
+    print(model)
+    x = torch.randn(2, 3, 224, 224)
+    origin_y = model(x)
+    model.structural_reparam()
+    print('------------------- after re-param -------------')
+    print(model)
+    reparam_y = model(x)
+    print('------------------- the difference is ------------------------')
+    print((origin_y - reparam_y).abs().sum())

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+asttokens==2.4.1
+einops==0.8.0
+numpy==1.22.0
+opencv-python==4.8.0.74
+pillow==9.5.0
+PyYAML==6.0.1
+scikit-image==0.21.0
+scikit-learn==1.3.2
+tensorboard==2.14.0
+tensorboard-data-server==0.7.2
+thop==0.1.1.post2209072238
+timm==0.6.13
+tqdm==4.66.4
+fastapi==0.103.1
+uvicorn==0.22.0
+pydantic==1.10.9
+torch==1.13.1
+torchvision==0.14.1

toolkit/chelper.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+import torch.nn as nn
+from model.convnext import convnext_base
+import timm
+from model.replknet import create_RepLKNet31B
+class augment_inputs_network(nn.Module):
+    def __init__(self, model):
+        super(augment_inputs_network, self).__init__()
+        self.model = model
+        self.adapter = nn.Conv2d(in_channels=6, out_channels=3, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = self.adapter(x)
+        x = (x - torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_MEAN, device=x.get_device()).view(1, -1, 1, 1)) / torch.as_tensor(timm.data.constants.IMAGENET_DEFAULT_STD, device=x.get_device()).view(1, -1, 1, 1)
+        return self.model(x)
+class final_model(nn.Module):  # Total parameters: 158.64741325378418 MB
+    def __init__(self):
+        super(final_model, self).__init__()
+        self.convnext = convnext_base(num_classes=2)
+        self.convnext = augment_inputs_network(self.convnext)
+        self.replknet = create_RepLKNet31B(num_classes=2)
+        self.replknet = augment_inputs_network(self.replknet)
+    def forward(self, x):
+        B, N, C, H, W = x.shape
+        x = x.view(-1, C, H, W)
+        pred1 = self.convnext(x)
+        pred2 = self.replknet(x)
+        outputs_score1 = nn.functional.softmax(pred1, dim=1)
+        outputs_score2 = nn.functional.softmax(pred2, dim=1)
+        predict_score1 = outputs_score1[:, 1]
+        predict_score2 = outputs_score2[:, 1]
+        predict_score1 = predict_score1.view(B, N).mean(dim=-1)
+        predict_score2 = predict_score2.view(B, N).mean(dim=-1)
+        return torch.stack((predict_score1, predict_score2), dim=-1).mean(dim=-1)
+def load_model(model_name, ctg_num, use_sync_bn):
+    """Load standard model, like vgg16, resnet18,
+    Args:
+        model_name: e.g., vgg16, inception, resnet18, ...
+        ctg_num: e.g., 1000
+        use_sync_bn: True/False
+    """
+    if model_name == 'convnext':
+        model = convnext_base(num_classes=ctg_num)
+        model_path = 'pre_model/convnext_base_1k_384.pth'
+        check_point = torch.load(model_path, map_location='cpu')['model']
+        check_point.pop('head.weight')
+        check_point.pop('head.bias')
+        model.load_state_dict(check_point, strict=False)
+        model = augment_inputs_network(model)
+    elif model_name == 'replknet':
+        model = create_RepLKNet31B(num_classes=ctg_num, use_sync_bn=use_sync_bn)
+        model_path = 'pre_model/RepLKNet-31B_ImageNet-1K_384.pth'
+        check_point = torch.load(model_path)
+        check_point.pop('head.weight')
+        check_point.pop('head.bias')
+        model.load_state_dict(check_point, strict=False)
+        model = augment_inputs_network(model)
+    elif model_name == 'all':
+        model = final_model()
+    print("model_name", model_name)
+    return model

toolkit/cmetric.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import math
+import torch
+import numpy as np
+import sklearn
+import sklearn.metrics
+class MultilabelClassificationMetric(object):
+    def __init__(self):
+        super(MultilabelClassificationMetric, self).__init__()
+        self.pred_scores_ = torch.FloatTensor()    # .FloatStorage()
+        self.grth_labels_ = torch.LongTensor()     # .LongStorage()
+    # Func:
+    #   Reset calculation.
+    def reset(self):
+        self.pred_scores_ = torch.FloatTensor(torch.FloatStorage())
+        self.grth_labels_ = torch.LongTensor(torch.LongStorage())
+    # Func:
+    #   Add prediction and groundtruth that will be used to calculate average precision.
+    # Input:
+    #   pred_scores  : predicted scores,   size: [batch_size, label_dim], format: [s0, s1, ..., s19]
+    #   grth_labels  : groundtruth labels, size: [batch_size, label_dim], format: [c0, c1, ..., c19]
+    def add(self, pred_scores, grth_labels):
+        if not torch.is_tensor(pred_scores):
+            pred_scores = torch.from_numpy(pred_scores)
+        if not torch.is_tensor(grth_labels):
+            grth_labels = torch.from_numpy(grth_labels)
+        # check
+        assert pred_scores.dim() == 2, 'wrong pred_scores size (should be 2D with format: [batch_size, label_dim(one column per class)])'
+        assert grth_labels.dim() == 2, 'wrong grth_labels size (should be 2D with format: [batch_size, label_dim(one column per class)])'
+        # check storage is sufficient
+        if self.pred_scores_.storage().size() < self.pred_scores_.numel() + pred_scores.numel():
+            new_size = math.ceil(self.pred_scores_.storage().size() * 1.5)
+            self.pred_scores_.storage().resize_(int(new_size + pred_scores.numel()))
+            self.grth_labels_.storage().resize_(int(new_size + pred_scores.numel()))
+        # store outputs and targets
+        offset = self.pred_scores_.size(0) if self.pred_scores_.dim() > 0 else 0
+        self.pred_scores_.resize_(offset + pred_scores.size(0), pred_scores.size(1))
+        self.grth_labels_.resize_(offset + grth_labels.size(0), grth_labels.size(1))
+        self.pred_scores_.narrow(0, offset, pred_scores.size(0)).copy_(pred_scores)
+        self.grth_labels_.narrow(0, offset, grth_labels.size(0)).copy_(grth_labels)
+    # Func:
+    #   Compute average precision.
+    def calc_avg_precision(self):
+        # check
+        if self.pred_scores_.numel() == 0: return 0
+        # calc by class
+        aps = torch.zeros(self.pred_scores_.size(1))
+        for cls_idx in range(self.pred_scores_.size(1)):
+            # get pred scores & grth labels at class cls_idx
+            cls_pred_scores = self.pred_scores_[:, cls_idx]    # predictions for all images at class cls_idx, format: [img_num]
+            cls_grth_labels = self.grth_labels_[:, cls_idx]    # truthvalues for all iamges at class cls_idx, format: [img_num]
+            # sort by socre
+            _, img_indices = torch.sort(cls_pred_scores, dim=0, descending=True)
+            # calc ap
+            TP, TPFP = 0., 0.
+            for img_idx in img_indices:
+                label = cls_grth_labels[img_idx]
+                # accumulate
+                TPFP += 1
+                if label == 1:
+                    TP += 1
+                    aps[cls_idx] += TP / TPFP
+            aps[cls_idx] /= (TP + 1e-5)
+        # return
+        return aps
+    # Func:
+    #   Compute average precision.
+    def calc_avg_precision2(self):
+        self.pred_scores_ = self.pred_scores_.cpu().numpy().astype('float32')
+        self.grth_labels_ = self.grth_labels_.cpu().numpy().astype('float32')
+        # check
+        if self.pred_scores_.size == 0: return 0
+        # calc by class
+        aps = np.zeros(self.pred_scores_.shape[1])
+        for cls_idx in range(self.pred_scores_.shape[1]):
+            # get pred scores & grth labels at class cls_idx
+            cls_pred_scores = self.pred_scores_[:, cls_idx]
+            cls_grth_labels = self.grth_labels_[:, cls_idx]
+            # compute ap for a object category
+            aps[cls_idx] = sklearn.metrics.average_precision_score(cls_grth_labels, cls_pred_scores)
+        aps[np.isnan(aps)] = 0
+        aps = np.around(aps, decimals=4)
+        return aps
+class MultiClassificationMetric(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        super(MultiClassificationMetric, self).__init__()
+        self.reset()
+        self.val = 0
+    def update(self, value, n=1):
+        self.val = value
+        self.sum += value
+        self.var += value * value
+        self.n += n
+        if self.n == 0:
+            self.mean, self.std = np.nan, np.nan
+        elif self.n == 1:
+            self.mean, self.std = self.sum, np.inf
+            self.mean_old = self.mean
+            self.m_s = 0.0
+        else:
+            self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
+            self.m_s += (value - self.mean_old) * (value - self.mean)
+            self.mean_old = self.mean
+            self.std = math.sqrt(self.m_s / (self.n - 1.0))
+    def reset(self):
+        self.n = 0
+        self.sum = 0.0
+        self.var = 0.0
+        self.val = 0.0
+        self.mean = np.nan
+        self.mean_old = 0.0
+        self.m_s = 0.0
+        self.std = np.nan
+def simple_accuracy(output, target):
+    """计算预测正确的准确率"""
+    with torch.no_grad():
+        _, preds = torch.max(output, 1)
+        correct = preds.eq(target).float()
+        accuracy = correct.sum() / len(target)
+        return accuracy

toolkit/dhelper.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+def get_file_name_ext(filepath):
+    # analyze
+    file_name, file_ext = os.path.splitext(filepath)
+    # return
+    return file_name, file_ext
+def get_file_ext(filepath):
+    return get_file_name_ext(filepath)[1]
+def traverse_recursively(fileroot, filepathes=[], extension='.*'):
+    """Traverse all file path in specialed directory recursively.
+    Args:
+        h: crop height.
+        extension: e.g. '.jpg .png .bmp .webp .tif .eps'
+    """
+    items = os.listdir(fileroot)
+    for item in items:
+        if os.path.isfile(os.path.join(fileroot, item)):
+            filepath = os.path.join(fileroot, item)
+            fileext = get_file_ext(filepath).lower()
+            if extension == '.*':
+                filepathes.append(filepath)
+            elif fileext in extension:
+                filepathes.append(filepath)
+            else:
+                pass
+        elif os.path.isdir(os.path.join(fileroot, item)):
+            traverse_recursively(os.path.join(fileroot, item), filepathes, extension)
+        else:
+            pass

toolkit/dtransform.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from timm.data.auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform
+from timm.data.transforms import RandomResizedCropAndInterpolation
+from PIL import Image
+import torchvision.transforms as transforms
+import cv2
+import numpy as np
+import torchvision.transforms.functional as F
+# 添加jpeg压缩
+class JPEGCompression:
+    def __init__(self, quality=10, p=0.3):
+        self.quality = quality
+        self.p = p
+    def __call__(self, img):
+        if np.random.rand() < self.p:
+            img_np = np.array(img)
+            _, buffer = cv2.imencode('.jpg', img_np[:, :, ::-1], [int(cv2.IMWRITE_JPEG_QUALITY), self.quality])
+            jpeg_img = cv2.imdecode(buffer, 1)
+            return Image.fromarray(jpeg_img[:, :, ::-1])
+        return img
+# 原始数据增强
+def transforms_imagenet_train(
+        img_size=(224, 224),
+        scale=(0.08, 1.0),
+        ratio=(3./4., 4./3.),
+        hflip=0.5,
+        vflip=0.5,
+        auto_augment='rand-m9-mstd0.5-inc1',
+        interpolation='random',
+        mean=(0.485, 0.456, 0.406),
+        jpeg_compression = 0,
+):
+    scale = tuple(scale or (0.08, 1.0))  # default imagenet scale range
+    ratio = tuple(ratio or (3./4., 4./3.))  # default imagenet ratio range
+    primary_tfl = [
+        RandomResizedCropAndInterpolation(img_size, scale=scale, ratio=ratio, interpolation=interpolation)]
+    if hflip > 0.:
+        primary_tfl += [transforms.RandomHorizontalFlip(p=hflip)]
+    if vflip > 0.:
+        primary_tfl += [transforms.RandomVerticalFlip(p=vflip)]
+    secondary_tfl = []
+    if auto_augment:
+        assert isinstance(auto_augment, str)
+        if isinstance(img_size, (tuple, list)):
+            img_size_min = min(img_size)
+        else:
+            img_size_min = img_size
+        aa_params = dict(
+            translate_const=int(img_size_min * 0.45),
+            img_mean=tuple([min(255, round(255 * x)) for x in mean]),
+        )
+        if auto_augment.startswith('rand'):
+            secondary_tfl += [rand_augment_transform(auto_augment, aa_params)]
+        elif auto_augment.startswith('augmix'):
+            aa_params['translate_pct'] = 0.3
+            secondary_tfl += [augment_and_mix_transform(auto_augment, aa_params)]
+        else:
+            secondary_tfl += [auto_augment_transform(auto_augment, aa_params)]
+    if jpeg_compression == 1:
+        secondary_tfl += [JPEGCompression(quality=10, p=0.3)]
+    final_tfl = [transforms.ToTensor()]
+    return transforms.Compose(primary_tfl + secondary_tfl + final_tfl)
+# 推理（测试）使用
+def create_transforms_inference(h=256, w=256):
+    transformer = transforms.Compose([
+            transforms.Resize(size=(h, w)),
+            transforms.ToTensor(),
+        ])
+    return transformer
+def create_transforms_inference1(h=256, w=256):
+    transformer = transforms.Compose([
+        transforms.Lambda(lambda img: F.rotate(img, angle=90)),
+        transforms.Resize(size=(h, w)),
+        transforms.ToTensor(),
+    ])
+    return transformer
+def create_transforms_inference2(h=256, w=256):
+    transformer = transforms.Compose([
+        transforms.Lambda(lambda img: F.rotate(img, angle=180)),
+        transforms.Resize(size=(h, w)),
+        transforms.ToTensor(),
+    ])
+    return transformer
+def create_transforms_inference3(h=256, w=256):
+    transformer = transforms.Compose([
+        transforms.Lambda(lambda img: F.rotate(img, angle=270)),
+        transforms.Resize(size=(h, w)),
+        transforms.ToTensor(),
+    ])
+    return transformer
+def create_transforms_inference4(h=256, w=256):
+    transformer = transforms.Compose([
+        transforms.Lambda(lambda img: F.hflip(img)),
+        transforms.Resize(size=(h, w)),
+        transforms.ToTensor(),
+    ])
+    return transformer
+def create_transforms_inference5(h=256, w=256):
+    transformer = transforms.Compose([
+        transforms.Lambda(lambda img: F.vflip(img)),
+        transforms.Resize(size=(h, w)),
+        transforms.ToTensor(),
+    ])
+    return transformer

toolkit/yacs.py ADDED Viewed

	@@ -0,0 +1,555 @@

+# Copyright (c) 2018-present, Facebook, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+"""YACS -- Yet Another Configuration System is designed to be a simple
+configuration management system for academic and industrial research
+projects.
+See README.md for usage and examples.
+"""
+import copy
+import io
+import logging
+import os
+import sys
+from ast import literal_eval
+import yaml
+# Flag for py2 and py3 compatibility to use when separate code paths are necessary
+# When _PY2 is False, we assume Python 3 is in use
+_PY2 = sys.version_info.major == 2
+# Filename extensions for loading configs from files
+_YAML_EXTS = {"", ".yaml", ".yml"}
+_PY_EXTS = {".py"}
+# py2 and py3 compatibility for checking file object type
+# We simply use this to infer py2 vs py3
+if _PY2:
+    _FILE_TYPES = (file, io.IOBase)
+else:
+    _FILE_TYPES = (io.IOBase,)
+# CfgNodes can only contain a limited set of valid types
+_VALID_TYPES = {tuple, list, str, int, float, bool, type(None)}
+# py2 allow for str and unicode
+if _PY2:
+    _VALID_TYPES = _VALID_TYPES.union({unicode})  # noqa: F821
+# Utilities for importing modules from file paths
+if _PY2:
+    # imp is available in both py2 and py3 for now, but is deprecated in py3
+    import imp
+else:
+    import importlib.util
+logger = logging.getLogger(__name__)
+class CfgNode(dict):
+    """
+    CfgNode represents an internal node in the configuration tree. It's a simple
+    dict-like container that allows for attribute-based access to keys.
+    """
+    IMMUTABLE = "__immutable__"
+    DEPRECATED_KEYS = "__deprecated_keys__"
+    RENAMED_KEYS = "__renamed_keys__"
+    NEW_ALLOWED = "__new_allowed__"
+    def __init__(self, init_dict=None, key_list=None, new_allowed=False):
+        """
+        Args:
+            init_dict (dict): the possibly-nested dictionary to initailize the CfgNode.
+            key_list (list[str]): a list of names which index this CfgNode from the root.
+                Currently only used for logging purposes.
+            new_allowed (bool): whether adding new key is allowed when merging with
+                other configs.
+        """
+        # Recursively convert nested dictionaries in init_dict into CfgNodes
+        init_dict = {} if init_dict is None else init_dict
+        key_list = [] if key_list is None else key_list
+        init_dict = self._create_config_tree_from_dict(init_dict, key_list)
+        super(CfgNode, self).__init__(init_dict)
+        # Manage if the CfgNode is frozen or not
+        self.__dict__[CfgNode.IMMUTABLE] = False
+        # Deprecated options
+        # If an option is removed from the code and you don't want to break existing
+        # yaml configs, you can add the full config key as a string to the set below.
+        self.__dict__[CfgNode.DEPRECATED_KEYS] = set()
+        # Renamed options
+        # If you rename a config option, record the mapping from the old name to the new
+        # name in the dictionary below. Optionally, if the type also changed, you can
+        # make the value a tuple that specifies first the renamed key and then
+        # instructions for how to edit the config file.
+        self.__dict__[CfgNode.RENAMED_KEYS] = {
+            # 'EXAMPLE.OLD.KEY': 'EXAMPLE.NEW.KEY',  # Dummy example to follow
+            # 'EXAMPLE.OLD.KEY': (                   # A more complex example to follow
+            #     'EXAMPLE.NEW.KEY',
+            #     "Also convert to a tuple, e.g., 'foo' -> ('foo',) or "
+            #     + "'foo:bar' -> ('foo', 'bar')"
+            # ),
+        }
+        # Allow new attributes after initialisation
+        self.__dict__[CfgNode.NEW_ALLOWED] = new_allowed
+    @classmethod
+    def _create_config_tree_from_dict(cls, dic, key_list):
+        """
+        Create a configuration tree using the given dict.
+        Any dict-like objects inside dict will be treated as a new CfgNode.
+        Args:
+            dic (dict):
+            key_list (list[str]): a list of names which index this CfgNode from the root.
+                Currently only used for logging purposes.
+        """
+        dic = copy.deepcopy(dic)
+        for k, v in dic.items():
+            if isinstance(v, dict):
+                # Convert dict to CfgNode
+                dic[k] = cls(v, key_list=key_list + [k])
+            else:
+                # Check for valid leaf type or nested CfgNode
+                _assert_with_logging(
+                    _valid_type(v, allow_cfg_node=False),
+                    "Key {} with value {} is not a valid type; valid types: {}".format(
+                        ".".join(key_list + [str(k)]), type(v), _VALID_TYPES
+                    ),
+                )
+        return dic
+    def __getattr__(self, name):
+        if name in self:
+            return self[name]
+        else:
+            raise AttributeError(name)
+    def __setattr__(self, name, value):
+        if self.is_frozen():
+            raise AttributeError(
+                "Attempted to set {} to {}, but CfgNode is immutable".format(
+                    name, value
+                )
+            )
+        _assert_with_logging(
+            name not in self.__dict__,
+            "Invalid attempt to modify internal CfgNode state: {}".format(name),
+        )
+        _assert_with_logging(
+            _valid_type(value, allow_cfg_node=True),
+            "Invalid type {} for key {}; valid types = {}".format(
+                type(value), name, _VALID_TYPES
+            ),
+        )
+        self[name] = value
+    def __str__(self):
+        def _indent(s_, num_spaces):
+            s = s_.split("\n")
+            if len(s) == 1:
+                return s_
+            first = s.pop(0)
+            s = [(num_spaces * " ") + line for line in s]
+            s = "\n".join(s)
+            s = first + "\n" + s
+            return s
+        r = ""
+        s = []
+        for k, v in sorted(self.items()):
+            seperator = "\n" if isinstance(v, CfgNode) else " "
+            attr_str = "{}:{}{}".format(str(k), seperator, str(v))
+            attr_str = _indent(attr_str, 2)
+            s.append(attr_str)
+        r += "\n".join(s)
+        return r
+    def __repr__(self):
+        return "{}({})".format(self.__class__.__name__, super(CfgNode, self).__repr__())
+    def dump(self, **kwargs):
+        """Dump to a string."""
+        def convert_to_dict(cfg_node, key_list):
+            if not isinstance(cfg_node, CfgNode):
+                _assert_with_logging(
+                    _valid_type(cfg_node),
+                    "Key {} with value {} is not a valid type; valid types: {}".format(
+                        ".".join(key_list), type(cfg_node), _VALID_TYPES
+                    ),
+                )
+                return cfg_node
+            else:
+                cfg_dict = dict(cfg_node)
+                for k, v in cfg_dict.items():
+                    cfg_dict[k] = convert_to_dict(v, key_list + [k])
+                return cfg_dict
+        self_as_dict = convert_to_dict(self, [])
+        return yaml.safe_dump(self_as_dict, **kwargs)
+    def merge_from_file(self, cfg_filename):
+        """Load a yaml config file and merge it this CfgNode."""
+        with open(cfg_filename, "r") as f:
+            cfg = self.load_cfg(f)
+        self.merge_from_other_cfg(cfg)
+    def merge_from_other_cfg(self, cfg_other):
+        """Merge `cfg_other` into this CfgNode."""
+        _merge_a_into_b(cfg_other, self, self, [])
+    def merge_from_list(self, cfg_list):
+        """Merge config (keys, values) in a list (e.g., from command line) into
+        this CfgNode. For example, `cfg_list = ['FOO.BAR', 0.5]`.
+        """
+        _assert_with_logging(
+            len(cfg_list) % 2 == 0,
+            "Override list has odd length: {}; it must be a list of pairs".format(
+                cfg_list
+            ),
+        )
+        root = self
+        for full_key, v in zip(cfg_list[0::2], cfg_list[1::2]):
+            if root.key_is_deprecated(full_key):
+                continue
+            if root.key_is_renamed(full_key):
+                root.raise_key_rename_error(full_key)
+            key_list = full_key.split(".")
+            d = self
+            for subkey in key_list[:-1]:
+                _assert_with_logging(
+                    subkey in d, "Non-existent key: {}".format(full_key)
+                )
+                d = d[subkey]
+            subkey = key_list[-1]
+            _assert_with_logging(subkey in d, "Non-existent key: {}".format(full_key))
+            value = self._decode_cfg_value(v)
+            value = _check_and_coerce_cfg_value_type(value, d[subkey], subkey, full_key)
+            d[subkey] = value
+    def freeze(self):
+        """Make this CfgNode and all of its children immutable."""
+        self._immutable(True)
+    def defrost(self):
+        """Make this CfgNode and all of its children mutable."""
+        self._immutable(False)
+    def is_frozen(self):
+        """Return mutability."""
+        return self.__dict__[CfgNode.IMMUTABLE]
+    def _immutable(self, is_immutable):
+        """Set immutability to is_immutable and recursively apply the setting
+        to all nested CfgNodes.
+        """
+        self.__dict__[CfgNode.IMMUTABLE] = is_immutable
+        # Recursively set immutable state
+        for v in self.__dict__.values():
+            if isinstance(v, CfgNode):
+                v._immutable(is_immutable)
+        for v in self.values():
+            if isinstance(v, CfgNode):
+                v._immutable(is_immutable)
+    def clone(self):
+        """Recursively copy this CfgNode."""
+        return copy.deepcopy(self)
+    def register_deprecated_key(self, key):
+        """Register key (e.g. `FOO.BAR`) a deprecated option. When merging deprecated
+        keys a warning is generated and the key is ignored.
+        """
+        _assert_with_logging(
+            key not in self.__dict__[CfgNode.DEPRECATED_KEYS],
+            "key {} is already registered as a deprecated key".format(key),
+        )
+        self.__dict__[CfgNode.DEPRECATED_KEYS].add(key)
+    def register_renamed_key(self, old_name, new_name, message=None):
+        """Register a key as having been renamed from `old_name` to `new_name`.
+        When merging a renamed key, an exception is thrown alerting to user to
+        the fact that the key has been renamed.
+        """
+        _assert_with_logging(
+            old_name not in self.__dict__[CfgNode.RENAMED_KEYS],
+            "key {} is already registered as a renamed cfg key".format(old_name),
+        )
+        value = new_name
+        if message:
+            value = (new_name, message)
+        self.__dict__[CfgNode.RENAMED_KEYS][old_name] = value
+    def key_is_deprecated(self, full_key):
+        """Test if a key is deprecated."""
+        if full_key in self.__dict__[CfgNode.DEPRECATED_KEYS]:
+            logger.warning("Deprecated config key (ignoring): {}".format(full_key))
+            return True
+        return False
+    def key_is_renamed(self, full_key):
+        """Test if a key is renamed."""
+        return full_key in self.__dict__[CfgNode.RENAMED_KEYS]
+    def raise_key_rename_error(self, full_key):
+        new_key = self.__dict__[CfgNode.RENAMED_KEYS][full_key]
+        if isinstance(new_key, tuple):
+            msg = " Note: " + new_key[1]
+            new_key = new_key[0]
+        else:
+            msg = ""
+        raise KeyError(
+            "Key {} was renamed to {}; please update your config.{}".format(
+                full_key, new_key, msg
+            )
+        )
+    def is_new_allowed(self):
+        return self.__dict__[CfgNode.NEW_ALLOWED]
+    def set_new_allowed(self, is_new_allowed):
+        """
+        Set this config (and recursively its subconfigs) to allow merging
+        new keys from other configs.
+        """
+        self.__dict__[CfgNode.NEW_ALLOWED] = is_new_allowed
+        # Recursively set new_allowed state
+        for v in self.__dict__.values():
+            if isinstance(v, CfgNode):
+                v.set_new_allowed(is_new_allowed)
+        for v in self.values():
+            if isinstance(v, CfgNode):
+                v.set_new_allowed(is_new_allowed)
+    @classmethod
+    def load_cfg(cls, cfg_file_obj_or_str):
+        """
+        Load a cfg.
+        Args:
+            cfg_file_obj_or_str (str or file):
+                Supports loading from:
+                - A file object backed by a YAML file
+                - A file object backed by a Python source file that exports an attribute
+                  "cfg" that is either a dict or a CfgNode
+                - A string that can be parsed as valid YAML
+        """
+        _assert_with_logging(
+            isinstance(cfg_file_obj_or_str, _FILE_TYPES + (str,)),
+            "Expected first argument to be of type {} or {}, but it was {}".format(
+                _FILE_TYPES, str, type(cfg_file_obj_or_str)
+            ),
+        )
+        if isinstance(cfg_file_obj_or_str, str):
+            return cls._load_cfg_from_yaml_str(cfg_file_obj_or_str)
+        elif isinstance(cfg_file_obj_or_str, _FILE_TYPES):
+            return cls._load_cfg_from_file(cfg_file_obj_or_str)
+        else:
+            raise NotImplementedError("Impossible to reach here (unless there's a bug)")
+    @classmethod
+    def _load_cfg_from_file(cls, file_obj):
+        """Load a config from a YAML file or a Python source file."""
+        _, file_extension = os.path.splitext(file_obj.name)
+        if file_extension in _YAML_EXTS:
+            return cls._load_cfg_from_yaml_str(file_obj.read())
+        elif file_extension in _PY_EXTS:
+            return cls._load_cfg_py_source(file_obj.name)
+        else:
+            raise Exception(
+                "Attempt to load from an unsupported file type {}; "
+                "only {} are supported".format(file_obj, _YAML_EXTS.union(_PY_EXTS))
+            )
+    @classmethod
+    def _load_cfg_from_yaml_str(cls, str_obj):
+        """Load a config from a YAML string encoding."""
+        cfg_as_dict = yaml.safe_load(str_obj)
+        return cls(cfg_as_dict)
+    @classmethod
+    def _load_cfg_py_source(cls, filename):
+        """Load a config from a Python source file."""
+        module = _load_module_from_file("yacs.config.override", filename)
+        _assert_with_logging(
+            hasattr(module, "cfg"),
+            "Python module from file {} must have 'cfg' attr".format(filename),
+        )
+        VALID_ATTR_TYPES = {dict, CfgNode}
+        _assert_with_logging(
+            type(module.cfg) in VALID_ATTR_TYPES,
+            "Imported module 'cfg' attr must be in {} but is {} instead".format(
+                VALID_ATTR_TYPES, type(module.cfg)
+            ),
+        )
+        return cls(module.cfg)
+    @classmethod
+    def _decode_cfg_value(cls, value):
+        """
+        Decodes a raw config value (e.g., from a yaml config files or command
+        line argument) into a Python object.
+        If the value is a dict, it will be interpreted as a new CfgNode.
+        If the value is a str, it will be evaluated as literals.
+        Otherwise it is returned as-is.
+        """
+        # Configs parsed from raw yaml will contain dictionary keys that need to be
+        # converted to CfgNode objects
+        if isinstance(value, dict):
+            return cls(value)
+        # All remaining processing is only applied to strings
+        if not isinstance(value, str):
+            return value
+        # Try to interpret `value` as a:
+        #   string, number, tuple, list, dict, boolean, or None
+        try:
+            value = literal_eval(value)
+        # The following two excepts allow v to pass through when it represents a
+        # string.
+        #
+        # Longer explanation:
+        # The type of v is always a string (before calling literal_eval), but
+        # sometimes it *represents* a string and other times a data structure, like
+        # a list. In the case that v represents a string, what we got back from the
+        # yaml parser is 'foo' *without quotes* (so, not '"foo"'). literal_eval is
+        # ok with '"foo"', but will raise a ValueError if given 'foo'. In other
+        # cases, like paths (v = 'foo/bar' and not v = '"foo/bar"'), literal_eval
+        # will raise a SyntaxError.
+        except ValueError:
+            pass
+        except SyntaxError:
+            pass
+        return value
+load_cfg = (
+    CfgNode.load_cfg
+)  # keep this function in global scope for backward compatibility
+def _valid_type(value, allow_cfg_node=False):
+    return (type(value) in _VALID_TYPES) or (
+        allow_cfg_node and isinstance(value, CfgNode)
+    )
+def _merge_a_into_b(a, b, root, key_list):
+    """Merge config dictionary a into config dictionary b, clobbering the
+    options in b whenever they are also specified in a.
+    """
+    _assert_with_logging(
+        isinstance(a, CfgNode),
+        "`a` (cur type {}) must be an instance of {}".format(type(a), CfgNode),
+    )
+    _assert_with_logging(
+        isinstance(b, CfgNode),
+        "`b` (cur type {}) must be an instance of {}".format(type(b), CfgNode),
+    )
+    for k, v_ in a.items():
+        full_key = ".".join(key_list + [k])
+        v = copy.deepcopy(v_)
+        v = b._decode_cfg_value(v)
+        if k in b:
+            v = _check_and_coerce_cfg_value_type(v, b[k], k, full_key)
+            # Recursively merge dicts
+            if isinstance(v, CfgNode):
+                try:
+                    _merge_a_into_b(v, b[k], root, key_list + [k])
+                except BaseException:
+                    raise
+            else:
+                b[k] = v
+        elif b.is_new_allowed():
+            b[k] = v
+        else:
+            if root.key_is_deprecated(full_key):
+                continue
+            elif root.key_is_renamed(full_key):
+                root.raise_key_rename_error(full_key)
+            else:
+                raise KeyError("Non-existent config key: {}".format(full_key))
+def _check_and_coerce_cfg_value_type(replacement, original, key, full_key):
+    """Checks that `replacement`, which is intended to replace `original` is of
+    the right type. The type is correct if it matches exactly or is one of a few
+    cases in which the type can be easily coerced.
+    """
+    original_type = type(original)
+    replacement_type = type(replacement)
+    # The types must match (with some exceptions)
+    if replacement_type == original_type:
+        return replacement
+    # If either of them is None, allow type conversion to one of the valid types
+    if (replacement_type == type(None) and original_type in _VALID_TYPES) or (
+        original_type == type(None) and replacement_type in _VALID_TYPES
+    ):
+        return replacement
+    # Cast replacement from from_type to to_type if the replacement and original
+    # types match from_type and to_type
+    def conditional_cast(from_type, to_type):
+        if replacement_type == from_type and original_type == to_type:
+            return True, to_type(replacement)
+        else:
+            return False, None
+    # Conditionally casts
+    # list <-> tuple
+    casts = [(tuple, list), (list, tuple)]
+    # For py2: allow converting from str (bytes) to a unicode string
+    try:
+        casts.append((str, unicode))  # noqa: F821
+    except Exception:
+        pass
+    for (from_type, to_type) in casts:
+        converted, converted_value = conditional_cast(from_type, to_type)
+        if converted:
+            return converted_value
+    raise ValueError(
+        "Type mismatch ({} vs. {}) with values ({} vs. {}) for config "
+        "key: {}".format(
+            original_type, replacement_type, original, replacement, full_key
+        )
+    )
+def _assert_with_logging(cond, msg):
+    if not cond:
+        logger.debug(msg)
+    assert cond, msg
+def _load_module_from_file(name, filename):
+    if _PY2:
+        module = imp.load_source(name, filename)
+    else:
+        spec = importlib.util.spec_from_file_location(name, filename)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+    return module