File size: 953 Bytes
6a62ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import numpy as np
import torch

from . import BaseWrapperDataset


class PrependDataset(BaseWrapperDataset):
    def __init__(self, dataset, prepend_getter, ensure_first_token_is=None):
        super().__init__(dataset)
        self.prepend_getter = prepend_getter
        self.ensure_first_token = ensure_first_token_is

    def __getitem__(self, idx):
        item = self.dataset[idx]
        is_tuple = isinstance(item, tuple)
        src = item[0] if is_tuple else item

        assert self.ensure_first_token is None or src[0] == self.ensure_first_token
        prepend_idx = self.prepend_getter(self.dataset, idx)
        assert isinstance(prepend_idx, int)
        src[0] = prepend_idx
        item = tuple((src,) + item[1:]) if is_tuple else src
        return item