Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
hf_model = "law-llm/law-glm-10b" | |
max_question_length = 64 | |
max_generation_length = 490 | |
tokenizer = AutoTokenizer.from_pretrained( | |
hf_model, | |
cache_dir=model_cache_dir, | |
use_fast=True, | |
trust_remote_code=True | |
) | |
model = AutoModelForSeq2SeqLM.from_pretrained( | |
hf_model, | |
cache_dir=model_cache_dir, | |
trust_remote_code=True | |
) | |
model = model.to('cuda') | |
model.eval() | |
model_inputs = "ๆ้ฎ: ็ฏไบ็็ช็ฝชๆไนๅคๅ? ๅ็ญ: [gMASK]" | |
model_inputs = tokenizer(model_inputs, | |
max_length=max_question_length, | |
padding=True, | |
truncation=True, | |
return_tensors="pt") | |
model_inputs = tokenizer.build_inputs_for_generation(model_inputs, | |
targets=None, | |
max_gen_length=max_generation_length, | |
padding=True) | |
inputs = model_inputs.to('cuda') | |
outputs = model.generate(**inputs, max_length=max_generation_length, | |
eos_token_id=tokenizer.eop_token_id) | |
prediction = tokenizer.decode(outputs[0].tolist()) |