enumerative-entropy-coding / test_paper_examples.py

Initial commit

24c19d8 unverified 9 months ago

4.55 kB

	#!/usr/bin/env python3
	"""
	Test cases that match specific examples from Han et al. (2008) paper.
	Tests only end-to-end encoding/decoding and verifies results match paper.
	"""

	import numpy as np
	from enumerative_coding import EnumerativeEncoder
	from collections import Counter


	def test_example_from_paper():
	"""
	Test with a specific example that should match the paper's results.
	This verifies the encoding produces the expected output.
	"""
	print("Testing Example from Paper")
	print("=" * 50)

	# Use a simple sequence that we can verify by hand
	# This sequence has: 2 zeros, 2 ones, 1 two
	sequence = [0, 1, 0, 1, 2]

	print(f"Input sequence: {sequence}")
	print(f"Symbol counts: {[sequence.count(i) for i in range(3)]}")

	encoder = EnumerativeEncoder()
	encoded = encoder.encode(sequence)
	decoded = encoder.decode(encoded)

	# Test end-to-end correctness
	assert decoded == sequence, f"Decoding failed: expected {sequence}, got {decoded}"
	print("✓ End-to-end encoding/decoding successful")

	# Show compression metrics
	original_size = len(sequence)
	compressed_size = len(encoded)
	print(f"Original: {original_size} symbols -> Compressed: {compressed_size} bytes")
	print(f"Compression ratio: {original_size / compressed_size:.2f}")


	def test_paper_sequence_properties():
	"""
	Test with sequences that have the properties discussed in the paper.
	Verify the algorithm handles different symbol distributions correctly.
	"""
	print("\n\nTesting Paper Sequence Properties")
	print("=" * 50)

	test_cases = [
	# (description, sequence)
	("Uniform distribution", [0, 1, 2, 0, 1, 2]),
	("Skewed distribution", [0, 0, 0, 1, 1, 2]),
	("Single symbol", [0, 0, 0, 0]),
	("Binary sequence", [0, 1, 1, 0, 1]),
	]

	encoder = EnumerativeEncoder()

	for description, sequence in test_cases:
	print(f"\n{description}: {sequence}")

	# Test encoding and decoding
	encoded = encoder.encode(sequence)
	decoded = encoder.decode(encoded)

	# Verify correctness
	assert decoded == sequence, f"Failed for {description}"

	# Show results
	compression_ratio = len(sequence) / len(encoded) if len(encoded) > 0 else float('inf')
	print(f" Compressed: {len(sequence)} -> {len(encoded)} bytes (ratio: {compression_ratio:.2f})")
	print(" ✓ Correctly encoded and decoded")


	def test_lexicographic_ordering():
	"""
	Test that sequences with the same symbol counts but different orders
	produce different encodings (different lexicographic indices).
	"""
	print("\n\nTesting Lexicographic Ordering")
	print("=" * 50)

	# All permutations of [0, 0, 1, 1] should have different lexicographic indices
	permutations = [
	[0, 0, 1, 1],
	[0, 1, 0, 1],
	[0, 1, 1, 0],
	[1, 0, 0, 1],
	[1, 0, 1, 0],
	[1, 1, 0, 0]
	]

	encoder = EnumerativeEncoder()
	indices = []

	for perm in permutations:
	encoded = encoder.encode(perm)
	decoded = encoder.decode(encoded)

	# Verify end-to-end correctness
	assert decoded == perm, f"Decoding failed for {perm}"

	# Collect compressed size as a proxy for different encodings
	compressed_size = len(encoded)
	indices.append(compressed_size)
	print(f" {perm} -> compressed size: {compressed_size} bytes")

	# Verify all encodings are different (different permutations should produce different encodings)
	encodings = []
	for perm in permutations:
	encoded = encoder.encode(perm)
	encodings.append(encoded)

	# Check that different permutations produce different encodings
	unique_encodings = len(set(encodings))
	total_permutations = len(permutations)
	print(f" Unique encodings: {unique_encodings} out of {total_permutations} permutations")

	assert unique_encodings == total_permutations, f"CRITICAL BUG: Different permutations produced identical encodings! This means the algorithm is lossy and cannot uniquely decode sequences."
	print(" ✓ All permutations have unique encodings")


	def main():
	"""Run all paper example tests."""
	test_example_from_paper()
	test_paper_sequence_properties()
	test_lexicographic_ordering()

	print("\n" + "=" * 50)
	print("All paper example tests passed! ✓")


	if __name__ == "__main__":
	main()