File size: 5,420 Bytes
d08512d 245e2c8 d08512d 5fb2d6c d08512d 5fb2d6c a871cf9 245e2c8 500864e 245e2c8 a871cf9 245e2c8 a871cf9 245e2c8 500864e 245e2c8 a871cf9 d08512d 1affbce 5dd3e30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
---
library_name: transformers
tags: []
---
# Model Card for X-LoRA-Gemma-7b
X-LoRA-Gemma combines protein, chemical, bio-inspired and mechanics of materials capabilities. We use a set of four LoRA adapters, defined as follows:
1. Bioinspired materials
2. Mechanics and materials
3. Protein mechanics tasks (featuring generative sequence-to-property and inverse capabilities)
4. Quantum-mechanics based molecular properties QM9 (featuring generative SMILES-to-property and inverse capabilities
```python
import torch
from xlora.xlora_utils import load_model
XLoRa_model_name = 'lamm-mit/x-lora-gemma-7b'
model,tokenizer=load_model(model_name = XLoRa_model_name,
device='cuda:0',
use_flash_attention_2=True,
dtype=torch.bfloat16,
)
eos_token_id= tokenizer('<end_of_turn>', add_special_tokens=False, ) ['input_ids'][0]
```
```python
def generate_XLoRA_Gemma (system_prompt='You a helpful assistant. You are familiar with materials science. ',
prompt='What is spider silk in the context of bioinspired materials?',
repetition_penalty=1.,num_beams=1,num_return_sequences=1,
top_p=0.9, top_k=256, temperature=.5,max_new_tokens=512, verbatim=False, eos_token=None,
add_special_tokens=True, prepend_response='',
):
if eos_token==None:
eos_token= tokenizer.eos_token_id
if system_prompt==None:
messages=[ {"role": "user", "content": prompt}, ]
else:
messages=[ {"role": "user", "content": system_prompt+prompt}, ]
txt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, )
txt=txt+prepend_response
inputs = tokenizer(txt, add_special_tokens =add_special_tokens, return_tensors ='pt').to(device)
with torch.no_grad():
outputs = model.generate(input_ids = inputs["input_ids"],
attention_mask = inputs["attention_mask"] , # This is usually done automatically by the tokenizer
max_new_tokens=max_new_tokens,
temperature=temperature, #value used to modulate the next token probabilities.
num_beams=num_beams,
top_k = top_k,
top_p = top_p,
num_return_sequences = num_return_sequences,
eos_token_id=eos_token,
pad_token_id = eos_token,
do_sample =True,#skip_prompt=True,
repetition_penalty=repetition_penalty,
)
return tokenizer.batch_decode(outputs[:,inputs["input_ids"].shape[1]:].detach().cpu().numpy(), skip_special_tokens=True)
```
Then, use as follows:
```python
from IPython.display import display, Markdown
q='''What is graphene?'''
res=generate_XLoRA_Gemma( system_prompt='You design materials.', prompt=q, max_new_tokens=1024, temperature=0.3, eos_token=eos_token_id)
display (Markdown(res))
```
### Example: Molecular design
```python
def design_from_target(
model,
tokenizer,
target,
temperature=0.1,
num_beams=1,
top_k=50,
top_p=0.95,
repetition_penalty=1.0,
messages=[]
):
# Format the target line for molecular property generation
line = f'GenerateMolecularProperties<{return_str(target)}>'
# Add the line to the message history
messages.append({"role": "user", "content": line})
# Apply chat template with optional tokenization
line = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Generate response with specified parameters
result = generate_response(
model,
tokenizer,
text_input=line,
num_return_sequences=1,
temperature=temperature,
top_k=top_k,
top_p=top_p,
max_new_tokens=256
)[0]
return result
```
Use case:
```python
import numpy as np
target = np.random.rand(12)
SMILES=design_from_target (model, tokenizer, target, messages=[]])
print (SMILES)
```
Calculate molecular properties:
```python
def properties_from_SMILES(
model,
tokenizer,
target,
temperature=0.1,
top_k=128,
top_p=0.9,
num_beams=1,
repetition_penalty=1.0
):
# Format the target line for molecular property calculation
line = f'CalculateMolecularProperties<{target}>'
# Initialize messages and add the formatted line
messages = [{"role": "user", "content": line}]
# Apply chat template with optional tokenization
line = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Generate response with specified parameters
result = generate_response(
model,
tokenizer,
text_input=line,
num_return_sequences=1,
temperature=temperature,
top_k=top_k,
top_p=top_p,
max_new_tokens=256
)[0]
# Extract relevant part of the result and convert to float list
result = extract_start_and_end(result, start_token='[', end_token=']')
return [float(i) for i in result.split(',')]
```
|