Upload model

Browse files

Files changed (5) hide show

README.md +199 -0
config.json +18 -0
configuration_revar.py +13 -0
model.safetensors +3 -0
modeling_revar.py +267 -0

README.md ADDED Viewed

	@@ -0,0 +1,199 @@

+---
+library_name: transformers
+tags: []
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]

config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "architectures": [
+    "ReVarModel"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_revar.ReVarConfig",
+    "AutoModel": "modeling_revar.ReVarModel"
+  },
+  "inner_dim": 480,
+  "kernel_size": 5,
+  "model_type": "revar",
+  "num_output_channels": 5,
+  "num_stacks": 20,
+  "outer_dim": 960,
+  "stack_size": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.43.3"
+}

configuration_revar.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from transformers import PretrainedConfig
+class ReVarConfig(PretrainedConfig):
+    model_type = "revar"
+    def __init__(self, outer_dim: int = 960, inner_dim: int = 480, kernel_size: int = 5, stack_size: int = 2, num_stacks: int = 20, num_output_channels: int = 5, **kwargs):
+        self.outer_dim = outer_dim
+        self.inner_dim = inner_dim
+        self.kernel_size = kernel_size
+        self.stack_size = stack_size
+        self.num_stacks = num_stacks
+        self.num_output_channels= num_output_channels
+        super().__init__(**kwargs)

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76ec9fb675327b7d1069eb69efb02800d53c51c6cdee67a72cc48e64ff2a39ce
+size 332860784

modeling_revar.py ADDED Viewed

	@@ -0,0 +1,267 @@

+from typing import List, Optional
+from itertools import product
+from collections import defaultdict
+import torch
+from torch import nn
+import torch.nn.utils.parametrize as parametrize
+def check_if_involution(indices: List[int]) -> bool:
+    return all(indices[indices[idx]] == idx for idx in range(len(indices)))
+def get_conv1d_output_length(
+    input_length: int, kernel_size: int, stride_size: int = 1, pad_size: int = 0, dilation_rate: int = 1
+) -> int:
+    return (input_length + 2 * pad_size - dilation_rate * (kernel_size - 1) - 1) // stride_size + 1
+def get_involution_indices(size: int) -> List[int]:
+    return list(reversed(range(size)))
+class RCEWeight(nn.Module):
+    def __init__(
+        self, input_involution_indices: List[int], output_involution_indices: List[int]
+    ):
+        if not check_if_involution(input_involution_indices) or not check_if_involution(
+                output_involution_indices):
+            raise ValueError(
+                "`input_involution_indices` and `output_involution_indices` must be involutions"
+            )
+        super().__init__()
+        self._input_involution_indices = input_involution_indices
+        self._output_involution_indices = output_involution_indices
+        self._input_involution_index_tensor = None
+        self._output_involution_index_tensor = None
+        self._device = None
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self._device != x.device:
+            self._input_involution_index_tensor = torch.tensor(self._input_involution_indices, device=x.device)
+            self._output_involution_index_tensor = torch.tensor(self._output_involution_indices, device=x.device)
+            self._device = x.device
+        output_involution_indices = self._output_involution_index_tensor
+        input_involution_indices = self._input_involution_index_tensor
+        return (x + x[output_involution_indices][:, input_involution_indices].flip(2)) / 2
+class IEBias(nn.Module):
+    def __init__(self, involution_indices: List[int]):
+        if not check_if_involution(involution_indices):
+            raise ValueError("`involution_indices` must be an involution")
+        super().__init__()
+        self._involution_indices = involution_indices
+        self._involution_index_tensor = None
+        self._device = None
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self._device != x.device:
+            self._involution_index_tensor = torch.tensor(self._involution_indices, device=x.device)
+            self._device = x.device
+        involution_indices = self._involution_index_tensor
+        return (x + x[involution_indices]) / 2
+class IEWeight(nn.Module):
+    def __init__(
+        self, input_involution_indices: List[int], output_involution_indices: List[int]
+    ):
+        if not check_if_involution(input_involution_indices) or not check_if_involution(
+                output_involution_indices):
+            raise ValueError(
+                "`input_involution_indices` and `output_involution_indices` must be involutions"
+            )
+        super().__init__()
+        self._input_involution_indices = input_involution_indices
+        self._output_involution_indices = output_involution_indices
+        self._input_involution_index_tensor = None
+        self._output_involution_index_tensor = None
+        self._device = None
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self._device != x.device:
+            self._input_involution_index_tensor = torch.tensor(self._input_involution_indices, device=x.device)
+            self._output_involution_index_tensor = torch.tensor(self._output_involution_indices, device=x.device)
+            self._device = x.device
+        output_involution_indices = self._output_involution_index_tensor
+        input_involution_indices = self._input_involution_index_tensor
+        return (x + x[input_involution_indices][:, output_involution_indices]) / 2
+class RCEByteNetBlock(nn.Module):
+    def __init__(self, outer_involution_indices: List[int], inner_dim: int, kernel_size: int, dilation_rate: int = 1):
+        outer_dim = len(outer_involution_indices)
+        if outer_dim % 2 != 0:
+            raise ValueError("`outer_involution_indices` must have an even length")
+        if inner_dim % 2 != 0:
+            raise ValueError("`inner_dim` must be even")
+        if kernel_size % 2 == 0:
+            raise ValueError("`kernel_size` must be odd")
+        super().__init__()
+        inner_involution_indices = get_involution_indices(inner_dim)
+        layers = [
+            nn.GroupNorm(1, outer_dim),
+            nn.GELU(),
+            nn.Conv1d(outer_dim, inner_dim, kernel_size=1),
+            nn.GroupNorm(1, inner_dim),
+            nn.GELU(),
+            nn.Conv1d(inner_dim, inner_dim, kernel_size, dilation=dilation_rate),
+            nn.GroupNorm(1, inner_dim),
+            nn.GELU(),
+            nn.Conv1d(inner_dim, outer_dim, kernel_size=1)
+        ]
+        parametrize.register_parametrization(
+            layers[2], "weight",
+            RCEWeight(outer_involution_indices, inner_involution_indices)
+        )
+        parametrize.register_parametrization(
+            layers[2], "bias",
+            IEBias(inner_involution_indices)
+        )
+        parametrize.register_parametrization(
+            layers[5], "weight",
+            RCEWeight(inner_involution_indices, inner_involution_indices)
+        )
+        parametrize.register_parametrization(
+            layers[5], "bias",
+            IEBias(inner_involution_indices)
+        )
+        parametrize.register_parametrization(
+            layers[8], "weight",
+            RCEWeight(inner_involution_indices, outer_involution_indices)
+        )
+        parametrize.register_parametrization(
+            layers[8], "bias",
+            IEBias(outer_involution_indices)
+        )
+        self.layers = nn.Sequential(*layers)
+        self._kernel_size = kernel_size
+        self._dilation_rate = dilation_rate
+    @property
+    def kernel_size(self):
+        return self._kernel_size
+    @property
+    def dilation_rate(self):
+        return self._dilation_rate
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        input_length = x.shape[2]
+        output_length = get_conv1d_output_length(input_length, self.kernel_size, dilation_rate=self.dilation_rate)
+        a = (input_length - output_length) // 2
+        if a == 0:
+            return self.layers(x) + x
+        return self.layers(x) + x[:, :, a:-a]
+class RCEByteNet(nn.Module):
+    def __init__(
+        self,
+        input_involution_indices: List[int],
+        output_involution_indices: List[int],
+        dilation_rates: List[int],
+        outer_dim: int,
+        inner_dim: int,
+        kernel_size: int,
+        num_output_channels: int = 1,
+        pad_token_idx: Optional[int] = None
+    ):
+        if pad_token_idx is not None and input_involution_indices[pad_token_idx] != pad_token_idx:
+            raise ValueError("`input_involution_indices[pad_token_idx]` must be equal to `pad_token_idx`")
+        super().__init__()
+        vocab_size = len(input_involution_indices)
+        outer_involution_indices = get_involution_indices(outer_dim)
+        self.embedding = nn.Embedding(vocab_size, outer_dim, padding_idx=pad_token_idx)
+        parametrize.register_parametrization(
+            self.embedding, "weight",
+            IEWeight(input_involution_indices, outer_involution_indices)
+        )
+        nn.init.normal_(self.embedding.weight, std=2**0.5)
+        self.embedding.weight.data[self.embedding.padding_idx].zero_()
+        self.embedding.requires_grad = False
+        blocks = []
+        receptive_field_size = 1
+        for r in dilation_rates:
+            blocks.append(RCEByteNetBlock(outer_involution_indices, inner_dim, kernel_size, dilation_rate=r))
+            receptive_field_size += (kernel_size - 1) * r
+        self.blocks = nn.Sequential(*blocks)
+        self._num_output_channels = num_output_channels
+        output_dim = len(output_involution_indices)
+        output_involution_indices = [
+                i * len(output_involution_indices) + j
+                for i, j in product(range(num_output_channels), output_involution_indices)
+            ]
+        self.output_layers = nn.Sequential(
+            nn.GroupNorm(1, outer_dim), nn.GELU(),
+            nn.Conv1d(outer_dim, output_dim * num_output_channels, kernel_size=1)
+        )
+        parametrize.register_parametrization(
+            self.output_layers[-1], "weight", RCEWeight(outer_involution_indices, output_involution_indices)
+        )
+        parametrize.register_parametrization(self.output_layers[-1], "bias", IEBias(output_involution_indices))
+    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
+        x = self.blocks(self.embedding(input_tensor).swapaxes(1, 2))
+        output_tensor = self.output_layers(x).swapaxes(1, 2)
+        output_dim = output_tensor.shape[2] // self._num_output_channels
+        shape = list(output_tensor.shape[:-1]) + [self._num_output_channels, output_dim]
+        return output_tensor.reshape(shape)
+from transformers import PreTrainedModel
+from .configuration_revar import ReVarConfig
+class ReVarModel(PreTrainedModel):
+    config_class = ReVarConfig
+    def __init__(self, config, **kwargs):
+        super().__init__(config, **kwargs)
+        dilation_rates = config.num_stacks * [config.kernel_size**i for i in range(0, config.stack_size)]
+        self._model = RCEByteNet(
+            input_involution_indices = [3, 2, 1, 0, 4, 5],
+            output_involution_indices=[3, 2, 1, 0],
+            dilation_rates=dilation_rates,
+            outer_dim = config.outer_dim,
+            inner_dim = config.inner_dim,
+            kernel_size=config.kernel_size,
+            num_output_channels=config.num_output_channels,
+            pad_token_idx=5
+        )
+    def get_embeddings(self, input_ids: torch.Tensor):
+        return self._model.get_embeddings(input_ids)
+    def forward(self, input_ids: torch.Tensor):
+        output_tensor = self._model(input_ids)
+        results = defaultdict(dict)
+        for i, cell_type in enumerate(["A549", "HepG2", "K562", "SK-N-SH", "HCT116"]):
+            for j, allele in enumerate("ACGT"):
+                results[cell_type][allele] = output_tensor[:, :, i, j]
+        return results