File size: 1,442 Bytes
dbae32f
 
 
 
f0540d3
 
 
 
dbae32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19e58f9
dbae32f
 
19e58f9
dbae32f
 
 
 
 
19e58f9
 
 
 
 
 
 
dbae32f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
---
license: apache-2.0
language:
- en
base_model:
- answerdotai/ModernBERT-base
- answerdotai/ModernBERT-large
base_model_relation: quantized
tags:
- fill-mask
- masked-lm
- long-context
- modernbert
---

# ModernBERT-CoreML

This repo contains [ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) and [ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large) converted to CoreML.

### Example Usage

```swift
import CoreML
import Tokenizers

let text = "The capital of Ireland is [MASK]."

print("Loading…")
let model = try await ModernBERT_base.load()
let tokenizer = try await AutoTokenizer.from(pretrained: "answerdotai/ModernBERT-base")

print("Tokenizing…")
let tokens = tokenizer(text)
let inputIDs = MLShapedArray(scalars: tokens.map(Int32.init), shape: [1, tokens.count])
let input = ModernBERT_baseInput(input_ids: inputIDs)

print("Predicting…")
let output = try await model.prediction(input: input)
let logits = output.logitsShapedArray

print("Decoding…")
let maskPosition = tokens.firstIndex(of: tokenizer.convertTokenToId("[MASK]")!)!
let predictedTokenID = await MLTensor(logits[0, maskPosition]).argmax().shapedArray(of: Int32.self).scalar!
let predictedTokenText = tokenizer.decode(tokens: [Int(predictedTokenID)])

print("Result:")
print(text.replacingOccurrences(of: "[MASK]", with: predictedTokenText.trimmingCharacters(in: .whitespaces)))
// The capital of Ireland is Dublin.
```