Qwen
/

Qwen1.5-0.5B-Chat-GPTQ-Int8

Text Generation

text-generation-inference

Inference Endpoints

8-bit precision

Model card Files Files and versions Community

hzhwcmhf commited on Feb 5

Commit

df19309

•

1 Parent(s): 320ba9b

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 license: other
 license_name: tongyi-qianwen-research
 license_link: >-
-  https://huggingface.co/Qwen/Qwen1.5-0_5B-Chat-GPTQ-Int8/blob/main/LICENSE
 language:
 - en
 pipeline_tag: text-generation
@@ -47,10 +47,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
 model = AutoModelForCausalLM.from_pretrained(
-    "Qwen/Qwen1.5-0_5B-Chat-GPTQ-Int8",
     device_map="auto"
 )
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0_5B-Chat-GPTQ-Int8")
 prompt = "Give me a short introduction to large language model."
 messages = [

 license: other
 license_name: tongyi-qianwen-research
 license_link: >-
+  https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8/blob/main/LICENSE
 language:
 - en
 pipeline_tag: text-generation
 device = "cuda" # the device to load the model onto
 model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8",
     device_map="auto"
 )
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8")
 prompt = "Give me a short introduction to large language model."
 messages = [