Melissa Roemmele commited on
Commit
11f10ff
·
1 Parent(s): f948afb

Updated handler.py

Browse files
Files changed (1) hide show
  1. handler.py +52 -30
handler.py CHANGED
@@ -1,38 +1,60 @@
1
  import torch
 
2
  from typing import Any, Dict
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
 
6
- class EndpointHandler:
7
  def __init__(self, path=""):
8
- # load model and tokenizer from path
9
- self.tokenizer = AutoTokenizer.from_pretrained(path)
10
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
11
- self.model = AutoModelForCausalLM.from_pretrained(path,
12
- # device_map="auto",
13
- torch_dtype=torch.float16,
14
- trust_remote_code=True).to(self.device)
15
-
16
- def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
17
- # process input
 
18
  inputs = data.pop("inputs", data)
19
  parameters = data.pop("parameters", {})
20
- return_full_text = parameters.pop("return_full_text", True)
21
-
22
- # preprocess
23
- inputs = self.tokenizer(inputs,
24
- return_tensors="pt",
25
- return_token_type_ids=False)
26
- inputs = inputs.to(self.device)
27
- input_len = len(inputs[0])
28
-
29
- outputs = self.model.generate(**inputs, **parameters)[0]
30
-
31
- if not return_full_text:
32
- outputs = outputs[input_len:]
33
-
34
- # postprocess the prediction
35
- prediction = self.tokenizer.decode(outputs,
36
- skip_special_tokens=True)
37
-
38
- return [{"generated_text": prediction}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ import transformers
3
  from typing import Any, Dict
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
 
7
+ class EndpointHandler():
8
  def __init__(self, path=""):
9
+ model = AutoModelForCausalLM.from_pretrained(path,
10
+ torch_dtype=torch.float16,
11
+ trust_remote_code=True)
12
+ tokenizer = AutoTokenizer.from_pretrained(path)
13
+ #device = "cuda:0" if torch.cuda.is_available() else "cpu"
14
+ self.pipeline = transformers.pipeline('text-generation',
15
+ model=model,
16
+ tokenizer=tokenizer,
17
+ device_map="auto")
18
+
19
+ def __call__(self, data: Dict[str, Any]):
20
  inputs = data.pop("inputs", data)
21
  parameters = data.pop("parameters", {})
22
+ with torch.autocast(self.pipeline.device.type, dtype=torch.float16):
23
+ outputs = self.pipeline(inputs,
24
+ **parameters)
25
+ return outputs
26
+
27
+
28
+ # class EndpointHandler:
29
+ # def __init__(self, path=""):
30
+ # # load model and tokenizer from path
31
+ # self.tokenizer = AutoTokenizer.from_pretrained(path)
32
+ # self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
+ # self.model = AutoModelForCausalLM.from_pretrained(path,
34
+ # device_map="auto",
35
+ # torch_dtype=torch.float16,
36
+ # trust_remote_code=True)
37
+
38
+ # def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
39
+ # # process input
40
+ # inputs = data.pop("inputs", data)
41
+ # parameters = data.pop("parameters", {})
42
+ # return_full_text = parameters.pop("return_full_text", True)
43
+
44
+ # # preprocess
45
+ # inputs = self.tokenizer(inputs,
46
+ # return_tensors="pt",
47
+ # return_token_type_ids=False)
48
+ # inputs = inputs.to(self.device)
49
+ # input_len = len(inputs[0])
50
+
51
+ # outputs = self.model.generate(**inputs, **parameters)[0]
52
+
53
+ # if not return_full_text:
54
+ # outputs = outputs[input_len:]
55
+
56
+ # # postprocess the prediction
57
+ # prediction = self.tokenizer.decode(outputs,
58
+ # skip_special_tokens=True)
59
+
60
+ # return [{"generated_text": prediction}]