|
import pytest |
|
from utils import * |
|
|
|
server = ServerPreset.tinyllama2() |
|
|
|
|
|
@pytest.fixture(scope="module", autouse=True) |
|
def create_server(): |
|
global server |
|
server = ServerPreset.tinyllama2() |
|
|
|
|
|
def test_tokenize_detokenize(): |
|
global server |
|
server.start() |
|
|
|
content = "What is the capital of France ?" |
|
res_tok = server.make_request("POST", "/tokenize", data={ |
|
"content": content |
|
}) |
|
assert res_tok.status_code == 200 |
|
assert len(res_tok.body["tokens"]) > 5 |
|
|
|
res_detok = server.make_request("POST", "/detokenize", data={ |
|
"tokens": res_tok.body["tokens"], |
|
}) |
|
assert res_detok.status_code == 200 |
|
assert res_detok.body["content"].strip() == content |
|
|
|
|
|
def test_tokenize_with_bos(): |
|
global server |
|
server.start() |
|
|
|
content = "What is the capital of France ?" |
|
bosId = 1 |
|
res_tok = server.make_request("POST", "/tokenize", data={ |
|
"content": content, |
|
"add_special": True, |
|
}) |
|
assert res_tok.status_code == 200 |
|
assert res_tok.body["tokens"][0] == bosId |
|
|
|
|
|
def test_tokenize_with_pieces(): |
|
global server |
|
server.start() |
|
|
|
content = "This is a test string with unicode 媽 and emoji 🤗" |
|
res_tok = server.make_request("POST", "/tokenize", data={ |
|
"content": content, |
|
"with_pieces": True, |
|
}) |
|
assert res_tok.status_code == 200 |
|
for token in res_tok.body["tokens"]: |
|
assert "id" in token |
|
assert token["id"] > 0 |
|
assert "piece" in token |
|
assert len(token["piece"]) > 0 |
|
|