# nanoBERT Example

Here we present nanoBERT, a nanobody-specific transformer to predict amino
 acids in a given position in a query sequence

In [1]:
# Install stadard library
! pip install --upgrade transformers



In [2]:
from transformers import pipeline, RobertaTokenizer, AutoModel

In [3]:
# Initialise the tokenizer
tokenizer = RobertaTokenizer.from_pretrained("tadsatlawa/nanoBERT", return_tensors="pt")

In [4]:
# Initialise model
unmasker = pipeline('fill-mask', model="tadsatlawa/nanoBERT", tokenizer=tokenizer, top_k=20 )

In [5]:
# Predict the residue probability at one or more masked positions
# mark position to predict with ''
seq = "QLVSGPEVKKPASVKVSCKASGYIFNNYGISWVRQAPGQGLEWMGWISTDNGNTNYAQKVQGRVTMTTDTSTSTAYMELRSLRYDDTAVYYCANNWGSYFEHWGQGTLVTVSS"

residueProbability = unmasker(seq)

# Print residue probabilities
for probability in residueProbability:
 print(probability)

{'score': 0.7448901534080505, 'token': 10, 'token_str': 'G', 'sequence': 'QLVSGPEVKKPGASVKVSCKASGYIFNNYGISWVRQAPGQGLEWMGWISTDNGNTNYAQKVQGRVTMTTDTSTSTAYMELRSLRYDDTAVYYCANNWGSYFEHWGQGTLVTVSS'}
{'score': 0.04520424082875252, 'token': 19, 'token_str': 'R', 'sequence': 'QLVSGPEVKKPRASVKVSCKASGYIFNNYGISWVRQAPGQGLEWMGWISTDNGNTNYAQKVQGRVTMTTDTSTSTAYMELRSLRYDDTAVYYCANNWGSYFEHWGQGTLVTVSS'}
{'score': 0.029332099482417107, 'token': 5, 'token_str': 'A', 'sequence': 'QLVSGPEVKKPAASVKVSCKASGYIFNNYGISWVRQAPGQGLEWMGWISTDNGNTNYAQKVQGRVTMTTDTSTSTAYMELRSLRYDDTAVYYCANNWGSYFEHWGQGTLVTVSS'}
{'score': 0.023554226383566856, 'token': 20, 'token_str': 'S', 'sequence': 'QLVSGPEVKKPSASVKVSCKASGYIFNNYGISWVRQAPGQGLEWMGWISTDNGNTNYAQKVQGRVTMTTDTSTSTAYMELRSLRYDDTAVYYCANNWGSYFEHWGQGTLVTVSS'}
{'score': 0.022556299343705177, 'token': 17, 'token_str': 'P', 'sequence': 'QLVSGPEVKKPPASVKVSCKASGYIFNNYGISWVRQAPGQGLEWMGWISTDNGNTNYAQKVQGRVTMTTDTSTSTAYMELRSLRYDDTAVYYCANNWGSYFEHWGQGTLVTVSS'}
{'score': 0.02046232856810093, 'token':