Bo1015
/

proteinglm-100b-int4

@@ -54,13 +54,19 @@ import torch
 tokenizer  = AutoTokenizer.from_pretrained("biomap-research/xtrimopglm-100b-int4", trust_remote_code=True, use_fast=True)
 config = AutoConfig.from_pretrained("biomap-research/xtrimopglm-100b-int4",  trust_remote_code=True, torch_dtype=torch.half)
 config.is_causal=False
-model = AutoModelForMaskedLM.from_config(config, trust_remote_code=True, torch_dtype=torch.half)
 # # if you don't have the single gpu with 80G memory, try the dispatch load.
 # model = load_checkpoint_and_dispatch(
-#     model, "biomap-research/xtrimopglm-100b-int4", device_map="auto", no_split_module_classes=["xTrimoPGLMBlock"], strict=True, dtype=dtype
 # )
-if torch.cuda.is_available():
-    model = model.cuda()
 model.eval()
 seq = 'MILMCQHFSGQFSKYFLAVSSDFCHFVFPIILVSHVNFKQMKRKGFALWNDRAVPFTQGIFTTVMILLQYLHGTG'
@@ -86,13 +92,18 @@ import torch
 tokenizer  = AutoTokenizer.from_pretrained("biomap-research/xtrimopglm-100b-int4", trust_remote_code=True, use_fast=True)
 config = AutoConfig.from_pretrained("biomap-research/xtrimopglm-100b-int4",  trust_remote_code=True, torch_dtype=torch.half)
 config.is_causal=True
-model = AutoModelForCausalLM.from_config(config, trust_remote_code=True, torch_dtype=torch.half)
 # # if you don't have the single gpu with 80G memory, try the dispatch load.
 # model = load_checkpoint_and_dispatch(
-#     model, "biomap-research/xtrimopglm-100b-int4", device_map="auto", no_split_module_classes=["xTrimoPGLMBlock"], strict=True, dtype=dtype
 # )
-if torch.cuda.is_available():
-    model = model.cuda()
 model.eval()
 gen_kwargs = {'max_length': 256, 'top_p': 0.8, 'temperature':0.9, "num_beams": 1}

 tokenizer  = AutoTokenizer.from_pretrained("biomap-research/xtrimopglm-100b-int4", trust_remote_code=True, use_fast=True)
 config = AutoConfig.from_pretrained("biomap-research/xtrimopglm-100b-int4",  trust_remote_code=True, torch_dtype=torch.half)
 config.is_causal=False
+model = AutoModelForMaskedLM.from_pretrained("biomap-research/xtrimopglm-100b-int4", config = config, torch_dtype=torch.half,trust_remote_code=True)
+if torch.cuda.is_available():
+    model = model.cuda()
 # # if you don't have the single gpu with 80G memory, try the dispatch load.
+# from accelerate import load_checkpoint_and_dispatch, init_empty_weights
+# with init_empty_weights():
+  # model = AutoModelForMaskedLM.from_config(config, trust_remote_code=True)
+#
 # model = load_checkpoint_and_dispatch(
+#     model, "<your model cached dir>", device_map="auto", no_split_module_classes=["xTrimoPGLMBlock"], strict=True, dtype=dtype
 # )
 model.eval()
 seq = 'MILMCQHFSGQFSKYFLAVSSDFCHFVFPIILVSHVNFKQMKRKGFALWNDRAVPFTQGIFTTVMILLQYLHGTG'
 tokenizer  = AutoTokenizer.from_pretrained("biomap-research/xtrimopglm-100b-int4", trust_remote_code=True, use_fast=True)
 config = AutoConfig.from_pretrained("biomap-research/xtrimopglm-100b-int4",  trust_remote_code=True, torch_dtype=torch.half)
 config.is_causal=True
+model = AutoModelForCausalLM.from_pretrained("biomap-research/xtrimopglm-100b-int4", config = config, torch_dtype=torch.half,trust_remote_code=True)
+if torch.cuda.is_available():
+    model = model.cuda()
 # # if you don't have the single gpu with 80G memory, try the dispatch load.
+# from accelerate import load_checkpoint_and_dispatch, init_empty_weights
+# with init_empty_weights():
+  # model = AutoModelForMaskedLM.from_config(config, trust_remote_code=True)
+#
 # model = load_checkpoint_and_dispatch(
+#     model, "<your model cached dir>", device_map="auto", no_split_module_classes=["xTrimoPGLMBlock"], strict=True, dtype=dtype
 # )
 model.eval()
 gen_kwargs = {'max_length': 256, 'top_p': 0.8, 'temperature':0.9, "num_beams": 1}