| |
| """ |
| Test cases that match specific examples from Han et al. (2008) paper. |
| Tests only end-to-end encoding/decoding and verifies results match paper. |
| """ |
|
|
| import numpy as np |
| from enumerative_coding import EnumerativeEncoder |
| from collections import Counter |
|
|
|
|
| def test_example_from_paper(): |
| """ |
| Test with a specific example that should match the paper's results. |
| This verifies the encoding produces the expected output. |
| """ |
| print("Testing Example from Paper") |
| print("=" * 50) |
| |
| |
| |
| sequence = [0, 1, 0, 1, 2] |
| |
| print(f"Input sequence: {sequence}") |
| print(f"Symbol counts: {[sequence.count(i) for i in range(3)]}") |
| |
| encoder = EnumerativeEncoder() |
| encoded = encoder.encode(sequence) |
| decoded = encoder.decode(encoded) |
| |
| |
| assert decoded == sequence, f"Decoding failed: expected {sequence}, got {decoded}" |
| print("✓ End-to-end encoding/decoding successful") |
| |
| |
| original_size = len(sequence) |
| compressed_size = len(encoded) |
| print(f"Original: {original_size} symbols -> Compressed: {compressed_size} bytes") |
| print(f"Compression ratio: {original_size / compressed_size:.2f}") |
|
|
|
|
| def test_paper_sequence_properties(): |
| """ |
| Test with sequences that have the properties discussed in the paper. |
| Verify the algorithm handles different symbol distributions correctly. |
| """ |
| print("\n\nTesting Paper Sequence Properties") |
| print("=" * 50) |
| |
| test_cases = [ |
| |
| ("Uniform distribution", [0, 1, 2, 0, 1, 2]), |
| ("Skewed distribution", [0, 0, 0, 1, 1, 2]), |
| ("Single symbol", [0, 0, 0, 0]), |
| ("Binary sequence", [0, 1, 1, 0, 1]), |
| ] |
| |
| encoder = EnumerativeEncoder() |
| |
| for description, sequence in test_cases: |
| print(f"\n{description}: {sequence}") |
| |
| |
| encoded = encoder.encode(sequence) |
| decoded = encoder.decode(encoded) |
| |
| |
| assert decoded == sequence, f"Failed for {description}" |
| |
| |
| compression_ratio = len(sequence) / len(encoded) if len(encoded) > 0 else float('inf') |
| print(f" Compressed: {len(sequence)} -> {len(encoded)} bytes (ratio: {compression_ratio:.2f})") |
| print(" ✓ Correctly encoded and decoded") |
|
|
|
|
| def test_lexicographic_ordering(): |
| """ |
| Test that sequences with the same symbol counts but different orders |
| produce different encodings (different lexicographic indices). |
| """ |
| print("\n\nTesting Lexicographic Ordering") |
| print("=" * 50) |
| |
| |
| permutations = [ |
| [0, 0, 1, 1], |
| [0, 1, 0, 1], |
| [0, 1, 1, 0], |
| [1, 0, 0, 1], |
| [1, 0, 1, 0], |
| [1, 1, 0, 0] |
| ] |
| |
| encoder = EnumerativeEncoder() |
| indices = [] |
| |
| for perm in permutations: |
| encoded = encoder.encode(perm) |
| decoded = encoder.decode(encoded) |
| |
| |
| assert decoded == perm, f"Decoding failed for {perm}" |
| |
| |
| compressed_size = len(encoded) |
| indices.append(compressed_size) |
| print(f" {perm} -> compressed size: {compressed_size} bytes") |
| |
| |
| encodings = [] |
| for perm in permutations: |
| encoded = encoder.encode(perm) |
| encodings.append(encoded) |
| |
| |
| unique_encodings = len(set(encodings)) |
| total_permutations = len(permutations) |
| print(f" Unique encodings: {unique_encodings} out of {total_permutations} permutations") |
| |
| assert unique_encodings == total_permutations, f"CRITICAL BUG: Different permutations produced identical encodings! This means the algorithm is lossy and cannot uniquely decode sequences." |
| print(" ✓ All permutations have unique encodings") |
|
|
|
|
| def main(): |
| """Run all paper example tests.""" |
| test_example_from_paper() |
| test_paper_sequence_properties() |
| test_lexicographic_ordering() |
| |
| print("\n" + "=" * 50) |
| print("All paper example tests passed! ✓") |
|
|
|
|
| if __name__ == "__main__": |
| main() |