File size: 1,289 Bytes
10b912d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from typing import List, Any
import tiktoken


class AbstractCompressor:
    base_model = None
    tokenizer = None
    gpt_tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo-16k")

    def compress(self, original_prompt: str, ratio: float) -> dict:
        """
        Input original prompt/sentence and compression ratio, return compressed prompt/sentence.\

        :param original_prompt:
        :param ratio:
        :return: dict object
        """
        # output content including
        # {
        #  'compressed_prompt': compressed prompt,
        #  'ratio': compression ratio,
        #  'original_tokens': token count of original prompt,
        #  'compressed_tokens': token count of compressed prompt
        # }
        raise NotImplementedError()

    def fit(self, datas: List[dict], valid_size: int) -> None:
        """
        For trainable methods, call this function for training parameters.
        Require training LongBench and valid set size.
        :param datas:
        :param valid_size:
        :return:
        """
        raise NotImplementedError()

    def set_model(self, model: Any, **kwargs):
        """
        Specify a trained or a pre-trained model.
        :param model:
        :param kwargs:
        :return:
        """
        pass