| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import numpy as np |
| import pytest |
|
|
| from verl.utils.tokenizer import normalize_token_ids |
|
|
|
|
| class DummyBatchEncoding: |
| def __init__(self, input_ids): |
| self.input_ids = input_ids |
|
|
|
|
| class DummyToList: |
| def __init__(self, data): |
| self._data = data |
|
|
| def tolist(self): |
| return self._data |
|
|
|
|
| @pytest.mark.parametrize( |
| ("tokenized_output", "expected"), |
| [ |
| |
| ([1, 2, 3], [1, 2, 3]), |
| ((1, 2, 3), [1, 2, 3]), |
| |
| (DummyToList([1, 2, 3]), [1, 2, 3]), |
| (np.array([1, 2, 3], dtype=np.int64), [1, 2, 3]), |
| |
| ({"input_ids": [1, 2, 3]}, [1, 2, 3]), |
| ({"input_ids": DummyToList([1, 2, 3])}, [1, 2, 3]), |
| ({"input_ids": [[1, 2, 3]]}, [1, 2, 3]), |
| (DummyBatchEncoding([1, 2, 3]), [1, 2, 3]), |
| (DummyBatchEncoding(DummyToList([[1, 2, 3]])), [1, 2, 3]), |
| |
| ([np.int64(1), np.int32(2), np.int16(3)], [1, 2, 3]), |
| ], |
| ) |
| def test_normalize_token_ids_valid_outputs(tokenized_output, expected): |
| assert normalize_token_ids(tokenized_output) == expected |
|
|
|
|
| @pytest.mark.parametrize( |
| "tokenized_output", |
| [ |
| "not-token-ids", |
| {"attention_mask": [1, 1, 1]}, |
| [[1, 2], [3, 4]], |
| [1, object(), 3], |
| ], |
| ) |
| def test_normalize_token_ids_invalid_outputs(tokenized_output): |
| with pytest.raises(TypeError): |
| normalize_token_ids(tokenized_output) |
|
|