mistralai/Ministral-8B-Instruct-2410 · Passkey evaluation on Flash Infer backend

I'm trying to evaluate this model on passkey dataset with 32k and 125k tokens.

"resps": [["<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>"]],"filtered_resps": ["<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>"],

SWA: [full, 32768, 32768, 32768]

"resps": [
[
"50489"
]
],
"filtered_resps": [
"50489"
],
"exact_match": 1.0

SWA: [full, 16384, 16384, 16384]

"resps": [
[
"50489 is the pass key"
]
],
"filtered_resps": [
"50489 is the pass key"
],
"exact_match": 0.0