mssfj
/

gemma-2-9b-bnb-4bit-chat-template

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

mssfj commited on Dec 16, 2024

Commit

1711b76

·

verified ·

1 Parent(s): d6ae3ca

Update README.md

Files changed (1) hide show

README.md +2 -8

README.md CHANGED Viewed

@@ -51,17 +51,15 @@ This is the model card of a 🤗 transformers model that has been pushed on the
 ## Uses
 <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 from peft import PeftModel, PeftConfig
-# モデル名
 model_name = "mssfj/gemma-2-9b-bnb-4bit-chat-template"
 lora_weight = "mssfj/gemma-2-9b-4bit-magpie"
-# 量子化設定
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=False,
     bnb_4bit_compute_dtype=torch.bfloat16,
@@ -69,17 +67,14 @@ quantization_config = BitsAndBytesConfig(
     bnb_4bit_use_double_quant=False
 )
-# ベースモデルのロード
 base_model = AutoModelForCausalLM.from_pretrained(
     model_name,
     quantization_config=quantization_config,
     device_map="auto"
     )
-# QLoRA済みモデルの適用
 model = PeftModel.from_pretrained(base_model, lora_weight)
-# トークナイザのロード
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 input="""日本で一番高い山は？
@@ -94,7 +89,6 @@ messages = [
     {"role": "user", "content": input},
 ]
-# チャットテンプレートを適用
 input_ids = tokenizer.apply_chat_template(
     messages,
     tokenize=True,

 ## Uses
 <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+使用方法は以下です。
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 from peft import PeftModel, PeftConfig
 model_name = "mssfj/gemma-2-9b-bnb-4bit-chat-template"
 lora_weight = "mssfj/gemma-2-9b-4bit-magpie"
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=False,
     bnb_4bit_compute_dtype=torch.bfloat16,
     bnb_4bit_use_double_quant=False
 )
 base_model = AutoModelForCausalLM.from_pretrained(
     model_name,
     quantization_config=quantization_config,
     device_map="auto"
     )
 model = PeftModel.from_pretrained(base_model, lora_weight)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 input="""日本で一番高い山は？
     {"role": "user", "content": input},
 ]
 input_ids = tokenizer.apply_chat_template(
     messages,
     tokenize=True,