cutechicken commited on
Commit
66a10f8
Β·
verified Β·
1 Parent(s): d18a77d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -46
app.py CHANGED
@@ -25,10 +25,11 @@ class ModelManager:
25
  print("ν† ν¬λ‚˜μ΄μ € λ‘œλ”© μ‹œμž‘...")
26
  self.tokenizer = AutoTokenizer.from_pretrained(
27
  MODEL_ID,
 
28
  token=HF_TOKEN,
29
  trust_remote_code=True
30
  )
31
- if self.tokenizer.pad_token is None:
32
  self.tokenizer.pad_token = self.tokenizer.eos_token
33
  print("ν† ν¬λ‚˜μ΄μ € λ‘œλ”© μ™„λ£Œ")
34
 
@@ -38,9 +39,16 @@ class ModelManager:
38
  token=HF_TOKEN,
39
  torch_dtype=torch.bfloat16,
40
  device_map="auto",
41
- trust_remote_code=True
 
42
  )
 
43
  print("λͺ¨λΈ λ‘œλ”© μ™„λ£Œ")
 
 
 
 
 
44
  except Exception as e:
45
  print(f"λͺ¨λΈ λ‘œλ”© 쀑 였λ₯˜ λ°œμƒ: {e}")
46
  raise Exception(f"λͺ¨λΈ λ‘œλ”© μ‹€νŒ¨: {e}")
@@ -48,59 +56,54 @@ class ModelManager:
48
  @spaces.GPU
49
  def generate_response(self, messages, max_tokens=4000, temperature=0.7, top_p=0.9):
50
  try:
51
- # λ©”μ‹œμ§€ ν¬λ§·νŒ…
52
- formatted_messages = []
 
 
 
53
  for msg in messages:
54
- if msg["role"] == "system":
55
- formatted_messages.append(f"System: {msg['content']}\n")
56
- elif msg["role"] == "user":
57
- formatted_messages.append(f"User: {msg['content']}\n")
58
- elif msg["role"] == "assistant":
59
- formatted_messages.append(f"Assistant: {msg['content']}\n")
60
-
61
- # μž…λ ₯ ν…μŠ€νŠΈ 생성
62
- prompt = "".join(formatted_messages)
63
-
64
- # ν† ν¬λ‚˜μ΄μ§•
65
- inputs = self.tokenizer(
66
  prompt,
67
  return_tensors="pt",
68
- padding=True,
69
- truncation=True,
70
- max_length=4096
71
  ).to(self.model.device)
72
-
73
- # 슀트리머 μ„€μ •
74
- streamer = TextIteratorStreamer(
75
- self.tokenizer,
76
- timeout=10.,
77
- skip_prompt=True,
 
 
 
 
 
 
 
 
 
 
 
78
  skip_special_tokens=True
79
  )
80
 
81
- # 생성 μ„€μ •
82
- generate_kwargs = dict(
83
- **inputs,
84
- streamer=streamer,
85
- max_new_tokens=max_tokens,
86
- do_sample=True,
87
- temperature=temperature,
88
- top_p=top_p,
89
- pad_token_id=self.tokenizer.pad_token_id,
90
- eos_token_id=self.tokenizer.eos_token_id
91
- )
92
-
93
- # 비동기 생성
94
- thread = Thread(target=self.model.generate, kwargs=generate_kwargs)
95
- thread.start()
96
-
97
- # 응닡 슀트리밍
98
- buffer = ""
99
- for new_text in streamer:
100
- buffer += new_text
101
  yield type('Response', (), {
102
  'choices': [type('Choice', (), {
103
- 'delta': {'content': new_text}
104
  })()]
105
  })()
106
 
 
25
  print("ν† ν¬λ‚˜μ΄μ € λ‘œλ”© μ‹œμž‘...")
26
  self.tokenizer = AutoTokenizer.from_pretrained(
27
  MODEL_ID,
28
+ use_fast=True,
29
  token=HF_TOKEN,
30
  trust_remote_code=True
31
  )
32
+ if not self.tokenizer.pad_token:
33
  self.tokenizer.pad_token = self.tokenizer.eos_token
34
  print("ν† ν¬λ‚˜μ΄μ € λ‘œλ”© μ™„λ£Œ")
35
 
 
39
  token=HF_TOKEN,
40
  torch_dtype=torch.bfloat16,
41
  device_map="auto",
42
+ trust_remote_code=True,
43
+ low_cpu_mem_usage=True
44
  )
45
+ self.model.eval() # 평가 λͺ¨λ“œλ‘œ μ„€μ •
46
  print("λͺ¨λΈ λ‘œλ”© μ™„λ£Œ")
47
+
48
+ # λͺ¨λΈκ³Ό ν† ν¬λ‚˜μ΄μ €κ°€ μ œλŒ€λ‘œ λ‘œλ“œλ˜μ—ˆλŠ”μ§€ 확인
49
+ if self.model is None or self.tokenizer is None:
50
+ raise Exception("λͺ¨λΈ λ˜λŠ” ν† ν¬λ‚˜μ΄μ €κ°€ μ œλŒ€λ‘œ μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
51
+
52
  except Exception as e:
53
  print(f"λͺ¨λΈ λ‘œλ”© 쀑 였λ₯˜ λ°œμƒ: {e}")
54
  raise Exception(f"λͺ¨λΈ λ‘œλ”© μ‹€νŒ¨: {e}")
 
56
  @spaces.GPU
57
  def generate_response(self, messages, max_tokens=4000, temperature=0.7, top_p=0.9):
58
  try:
59
+ if self.model is None or self.tokenizer is None:
60
+ raise Exception("λͺ¨λΈμ΄ μ΄ˆκΈ°ν™”λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
61
+
62
+ # μž…λ ₯ ν…μŠ€νŠΈ μ€€λΉ„
63
+ prompt = ""
64
  for msg in messages:
65
+ role = msg["role"]
66
+ content = msg["content"]
67
+ if role == "system":
68
+ prompt += f"System: {content}\n"
69
+ elif role == "user":
70
+ prompt += f"Human: {content}\n"
71
+ elif role == "assistant":
72
+ prompt += f"Assistant: {content}\n"
73
+ prompt += "Assistant: " # 응닡 μ‹œμž‘ ν”„λ‘¬ν”„νŠΈ
74
+
75
+ # μž…λ ₯ 인코딩
76
+ input_ids = self.tokenizer.encode(
77
  prompt,
78
  return_tensors="pt",
79
+ add_special_tokens=True
 
 
80
  ).to(self.model.device)
81
+
82
+ # 응닡 생성
83
+ with torch.no_grad():
84
+ output_ids = self.model.generate(
85
+ input_ids,
86
+ max_new_tokens=max_tokens,
87
+ do_sample=True,
88
+ temperature=temperature,
89
+ top_p=top_p,
90
+ pad_token_id=self.tokenizer.pad_token_id,
91
+ eos_token_id=self.tokenizer.eos_token_id,
92
+ num_return_sequences=1
93
+ )
94
+
95
+ # 응닡 λ””μ½”λ”© 및 슀트리밍
96
+ generated_text = self.tokenizer.decode(
97
+ output_ids[0][input_ids.shape[1]:],
98
  skip_special_tokens=True
99
  )
100
 
101
+ # 단어 λ‹¨μœ„λ‘œ 슀트리밍
102
+ words = generated_text.split()
103
+ for word in words:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  yield type('Response', (), {
105
  'choices': [type('Choice', (), {
106
+ 'delta': {'content': word + " "}
107
  })()]
108
  })()
109