monsoon-nlp commited on
Commit
4e7da13
1 Parent(s): 4a226bb

tokenizer fix

Browse files
Files changed (1) hide show
  1. README.md +4 -2
README.md CHANGED
@@ -43,7 +43,7 @@ Information about location in the kaniwa chromosome: >lcl|Cp5
43
 
44
  ## Usage
45
 
46
- ### Basic inference
47
 
48
  ```python
49
  from peft import AutoPeftModelForCausalLM
@@ -64,16 +64,18 @@ sample = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]
64
  ### LoRA finetuning on a new task
65
 
66
  ```python
 
67
  from trl import SFTTrainer
68
  from unsloth import FastLanguageModel
69
 
70
- model, tokenizer = FastLanguageModel.from_pretrained(
71
  model_name = "monsoon-nlp/llama3-biotokenpretrain-kaniwa",
72
  max_seq_length = 7_000, # max 6,000 bp for AgroNT tasks
73
  dtype = None,
74
  load_in_4bit = True,
75
  resize_model_vocab=128260, # includes biotokens
76
  )
 
77
  tokenizer.pad_token = tokenizer.eos_token # pad fix
78
 
79
  trainer = SFTTrainer(
 
43
 
44
  ## Usage
45
 
46
+ ### Inference with DNA sequence
47
 
48
  ```python
49
  from peft import AutoPeftModelForCausalLM
 
64
  ### LoRA finetuning on a new task
65
 
66
  ```python
67
+ from transformers import AutoTokenizer
68
  from trl import SFTTrainer
69
  from unsloth import FastLanguageModel
70
 
71
+ model, _ = FastLanguageModel.from_pretrained(
72
  model_name = "monsoon-nlp/llama3-biotokenpretrain-kaniwa",
73
  max_seq_length = 7_000, # max 6,000 bp for AgroNT tasks
74
  dtype = None,
75
  load_in_4bit = True,
76
  resize_model_vocab=128260, # includes biotokens
77
  )
78
+ tokenizer = AutoTokenizer.from_pretrained("monsoon-nlp/llama3-biotokenpretrain-kaniwa")
79
  tokenizer.pad_token = tokenizer.eos_token # pad fix
80
 
81
  trainer = SFTTrainer(