File size: 1,237 Bytes
5569fcb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from transformers import AutoTokenizer, AutoModelForCausalLM


DEVICE = 'cpu'
TOKENIZER = None
MODEL = None


def setup(model: str, setup_torch: bool = False):
    global TOKENIZER, MODEL, DEVICE
    if setup_torch:
        try:
            import torch
            torch.set_default_tensor_type(torch.cuda.FloatTensor)
            # Make sure that we're using CPU when GPU isn't available
            DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        except:
            print("ERROR: Can't set default tensor type to FloatTensor")

    TOKENIZER = AutoTokenizer.from_pretrained(model)
    MODEL = AutoModelForCausalLM.from_pretrained(model)


def generate(token: str) -> str:
    """
    Generate some code using the loaded model given some input.

    :param token: The input that will be passed into the tokenizer for the model to generate an output with.
    :return: The string output.
    """
    if TOKENIZER is None or MODEL is None:
        raise Exception("Model and tokenizer has not been setup.")

    inputs = TOKENIZER(token, return_tensors='pt').to(DEVICE)
    sample = MODEL.generate(**inputs, max_length=128)
    return TOKENIZER.decode(sample[0], truncate_before_pattern=[r"\n\n^#", "^'''", "\n\n\n"])