kfkas commited on
Commit
a252122
โ€ข
1 Parent(s): 8feb836

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -5
README.md CHANGED
@@ -19,15 +19,25 @@ OUTPUT = ๊ฐ label์— ๋งž๋Š” ๋‰ด์Šค ๊ธฐ์‚ฌ ์ œ๋ชฉ์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
19
  ```python
20
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
21
 
 
22
  model_dir = "t5-large-korean-news-title-klue-ynat"
23
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
24
  model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
 
25
 
26
- text = "ํšŒ์ƒˆ๊ธด๊ฐ„ ์ž‘๊นŒ ๊น€๋™์‹œ ๊ฑ์‹ฌ๊ผฌ๋ฐฑ ๋œฝ ์ƒˆ ์†Œ์„ค์ง‘ ๋šœ๊ถŒ ์ถœ๊ฐ„"
27
- inputs = tokenizer(text, max_length=256, truncation=True, return_tensors="pt")
28
- output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=100)
29
- decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
30
- predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]
 
 
 
 
 
 
 
 
31
  print(predicted_title)
32
  ```
33
 
 
19
  ```python
20
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
21
 
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
  model_dir = "t5-large-korean-news-title-klue-ynat"
24
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
25
  model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
26
+ model.to(device)
27
 
28
+ label_list = ['IT๊ณผํ•™','๊ฒฝ์ œ','์‚ฌํšŒ','์ƒํ™œ๋ฌธํ™”','์„ธ๊ณ„','์Šคํฌ์ธ ','์ •์น˜']
29
+ text = "IT๊ณผํ•™"
30
+
31
+ inputs = tokenizer.encode(text, max_length=256, truncation=True, return_tensors="pt")
32
+ with torch.no_grad():
33
+ output = model.generate(
34
+ input_ids,
35
+ do_sample=True, #์ƒ˜ํ”Œ๋ง ์ „๋žต ์‚ฌ์šฉ
36
+ max_length=128, # ์ตœ๋Œ€ ๋””์ฝ”๋”ฉ ๊ธธ์ด๋Š” 50
37
+ top_k=50, # ํ™•๋ฅ  ์ˆœ์œ„๊ฐ€ 50์œ„ ๋ฐ–์ธ ํ† ํฐ์€ ์ƒ˜ํ”Œ๋ง์—์„œ ์ œ์™ธ
38
+ top_p=0.95, # ๋ˆ„์  ํ™•๋ฅ ์ด 95%์ธ ํ›„๋ณด์ง‘ํ•ฉ์—์„œ๋งŒ ์ƒ์„ฑ
39
+ )
40
+ decoded_output = tokenizer.decode(output, skip_special_tokens=True)[0]
41
  print(predicted_title)
42
  ```
43