| | """ |
| | BTLM_Extensions: Extensions Package for BitTransformerLM |
| | ======================================================= |
| | |
| | This package provides advanced optimizers and compression techniques |
| | as extensions for BitTransformerLM, allowing easy experimentation with |
| | different training configurations. |
| | |
| | Available Extensions: |
| | |
| | Optimizers: |
| | - Muon: Orthogonal momentum optimizer with Newton-Schulz iterations |
| | - Lion: EvoLved Sign Momentum optimizer for memory efficiency |
| | - Adafactor: Memory-efficient factorized optimizer |
| | |
| | Compression: |
| | - RLE: Advanced Run-Length Encoding with multiple schemes |
| | |
| | Usage: |
| | from BTLM_Extensions import configure_muon_optimizer, RLEEncoder |
| | |
| | # Use Muon optimizer |
| | optimizer, scheduler = configure_muon_optimizer(model, lr=1e-3) |
| | |
| | # Use RLE compression |
| | encoder = RLEEncoder(scheme="adaptive") |
| | compressed, metadata = encoder.encode(data) |
| | """ |
| |
|
| | __version__ = "1.0.0" |
| | __author__ = "BitTransformerLM Extensions" |
| | __email__ = "extensions@bittransformerlm.ai" |
| |
|
| | |
| | from .muon_optimizer import ( |
| | Muon, |
| | configure_muon_optimizer, |
| | create_muon_training_config, |
| | ) |
| |
|
| | from .lion_optimizer import ( |
| | Lion, |
| | AdaptiveLion, |
| | configure_lion_optimizer, |
| | configure_adaptive_lion_optimizer, |
| | create_lion_training_config, |
| | ) |
| |
|
| | from .adafactor_optimizer import ( |
| | Adafactor, |
| | AdafactorScheduler, |
| | configure_adafactor_optimizer, |
| | configure_adafactor_with_scheduler, |
| | create_adafactor_training_config, |
| | analyze_memory_usage, |
| | ) |
| |
|
| | |
| | from .rle_compression import ( |
| | RLEEncoder, |
| | CompressedBitDataset, |
| | create_compression_aware_loss, |
| | integrate_rle_with_training, |
| | benchmark_compression_schemes, |
| | create_rle_training_config, |
| | ) |
| |
|
| | |
| | def get_optimizer_config(optimizer_type: str, **kwargs): |
| | """ |
| | Get configuration for specified optimizer type. |
| | |
| | Args: |
| | optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') |
| | **kwargs: Optimizer-specific parameters |
| | |
| | Returns: |
| | Dictionary with optimizer configuration |
| | """ |
| | if optimizer_type.lower() == "muon": |
| | return create_muon_training_config(**kwargs) |
| | elif optimizer_type.lower() == "lion": |
| | return create_lion_training_config(**kwargs) |
| | elif optimizer_type.lower() == "adafactor": |
| | return create_adafactor_training_config(**kwargs) |
| | else: |
| | raise ValueError(f"Unknown optimizer type: {optimizer_type}") |
| |
|
| |
|
| | def configure_optimizer(optimizer_type: str, model, **kwargs): |
| | """ |
| | Configure optimizer based on type string. |
| | |
| | Args: |
| | optimizer_type: Type of optimizer ('muon', 'lion', 'adafactor') |
| | model: PyTorch model to optimize |
| | **kwargs: Optimizer-specific parameters |
| | |
| | Returns: |
| | Tuple of (optimizer, scheduler) |
| | """ |
| | if optimizer_type.lower() == "muon": |
| | return configure_muon_optimizer(model, **kwargs) |
| | elif optimizer_type.lower() == "lion": |
| | return configure_lion_optimizer(model, **kwargs) |
| | elif optimizer_type.lower() == "adafactor": |
| | return configure_adafactor_optimizer(model, **kwargs) |
| | else: |
| | raise ValueError(f"Unknown optimizer type: {optimizer_type}") |
| |
|
| |
|
| | |
| | class ExtensionManager: |
| | """ |
| | Manager class for easy integration with BitTransformerLM. |
| | |
| | Provides unified interface for switching between optimizers |
| | and compression schemes. |
| | """ |
| | |
| | SUPPORTED_OPTIMIZERS = ["muon", "lion", "adafactor"] |
| | SUPPORTED_COMPRESSION = ["rle"] |
| | |
| | def __init__(self): |
| | self.current_optimizer = None |
| | self.current_compression = None |
| | |
| | def setup_optimizer(self, optimizer_type: str, model, **kwargs): |
| | """Setup optimizer for training.""" |
| | if optimizer_type not in self.SUPPORTED_OPTIMIZERS: |
| | raise ValueError(f"Unsupported optimizer: {optimizer_type}") |
| | |
| | optimizer, scheduler = configure_optimizer(optimizer_type, model, **kwargs) |
| | self.current_optimizer = optimizer_type |
| | return optimizer, scheduler |
| | |
| | def setup_compression(self, compression_type: str, **kwargs): |
| | """Setup compression scheme.""" |
| | if compression_type not in self.SUPPORTED_COMPRESSION: |
| | raise ValueError(f"Unsupported compression: {compression_type}") |
| | |
| | if compression_type == "rle": |
| | encoder = RLEEncoder(**kwargs) |
| | self.current_compression = compression_type |
| | return encoder |
| | |
| | def create_training_config(self, optimizer_type: str = "muon", compression_type: str = "rle", **kwargs): |
| | """Create comprehensive training configuration.""" |
| | config = { |
| | "optimizer": get_optimizer_config(optimizer_type, **kwargs), |
| | "compression": create_rle_training_config(**kwargs) if compression_type == "rle" else None, |
| | "extensions": { |
| | "optimizer_type": optimizer_type, |
| | "compression_type": compression_type, |
| | "version": __version__, |
| | } |
| | } |
| | return config |
| | |
| | def benchmark_optimizers(self, model, test_data, epochs: int = 5): |
| | """Benchmark all available optimizers on test data.""" |
| | import torch |
| | import torch.nn.functional as F |
| | import time |
| | |
| | results = {} |
| | |
| | for opt_type in self.SUPPORTED_OPTIMIZERS: |
| | print(f"Benchmarking {opt_type} optimizer...") |
| | |
| | |
| | model_copy = type(model)(**model._current_params()) |
| | model_copy.load_state_dict(model.state_dict()) |
| | |
| | try: |
| | |
| | optimizer, scheduler = self.setup_optimizer(opt_type, model_copy, lr=1e-3) |
| | |
| | |
| | start_time = time.time() |
| | losses = [] |
| | |
| | for epoch in range(epochs): |
| | optimizer.zero_grad() |
| | |
| | |
| | logits, _ = model_copy(test_data) |
| | pred = logits[:, :-1, :].reshape(-1, 2) |
| | target = test_data[:, 1:].reshape(-1) |
| | loss = F.cross_entropy(pred, target) |
| | |
| | loss.backward() |
| | optimizer.step() |
| | if scheduler: |
| | scheduler.step() |
| | |
| | losses.append(loss.item()) |
| | |
| | end_time = time.time() |
| | |
| | results[opt_type] = { |
| | "final_loss": losses[-1], |
| | "avg_loss": sum(losses) / len(losses), |
| | "training_time": end_time - start_time, |
| | "convergence": losses[0] - losses[-1], |
| | "success": True, |
| | } |
| | |
| | except Exception as e: |
| | results[opt_type] = { |
| | "final_loss": float('inf'), |
| | "avg_loss": float('inf'), |
| | "training_time": 0, |
| | "convergence": 0, |
| | "success": False, |
| | "error": str(e), |
| | } |
| | |
| | return results |
| |
|
| |
|
| | |
| | extension_manager = ExtensionManager() |
| |
|
| | |
| | __all__ = [ |
| | |
| | "Muon", |
| | "Lion", |
| | "AdaptiveLion", |
| | "Adafactor", |
| | "AdafactorScheduler", |
| | |
| | |
| | "configure_muon_optimizer", |
| | "configure_lion_optimizer", |
| | "configure_adaptive_lion_optimizer", |
| | "configure_adafactor_optimizer", |
| | "configure_adafactor_with_scheduler", |
| | |
| | |
| | "create_muon_training_config", |
| | "create_lion_training_config", |
| | "create_adafactor_training_config", |
| | |
| | |
| | "RLEEncoder", |
| | "CompressedBitDataset", |
| | "create_compression_aware_loss", |
| | "integrate_rle_with_training", |
| | "benchmark_compression_schemes", |
| | "create_rle_training_config", |
| | |
| | |
| | "get_optimizer_config", |
| | "configure_optimizer", |
| | "ExtensionManager", |
| | "extension_manager", |
| | "analyze_memory_usage", |
| | ] |
| |
|
| | |
| | def get_version(): |
| | """Get package version.""" |
| | return __version__ |
| |
|
| | def list_optimizers(): |
| | """List all available optimizers.""" |
| | return ExtensionManager.SUPPORTED_OPTIMIZERS.copy() |
| |
|
| | def list_compression_schemes(): |
| | """List all available compression schemes.""" |
| | return ExtensionManager.SUPPORTED_COMPRESSION.copy() |
| |
|
| | def get_package_info(): |
| | """Get package information.""" |
| | return { |
| | "name": "BTLM_Extensions", |
| | "version": __version__, |
| | "author": __author__, |
| | "email": __email__, |
| | "optimizers": list_optimizers(), |
| | "compression": list_compression_schemes(), |
| | "description": "Advanced optimizers and compression for BitTransformerLM", |
| | } |
| |
|
| | |
| | def _welcome_message(): |
| | """Print welcome message with available extensions.""" |
| | print(f"π BTLM_Extensions v{__version__} loaded!") |
| | print(f"π Available optimizers: {', '.join(list_optimizers())}") |
| | print(f"ποΈ Available compression: {', '.join(list_compression_schemes())}") |
| | print("π Use help(BTLM_Extensions) for detailed documentation") |
| |
|
| | |
| | |
| |
|
| | |
| | def demo_usage(): |
| | """ |
| | Demonstration of BTLM_Extensions usage: |
| | |
| | # Quick optimizer swap |
| | from BTLM_Extensions import configure_optimizer |
| | |
| | # Try different optimizers |
| | muon_opt, muon_sched = configure_optimizer("muon", model, lr=1e-3) |
| | lion_opt, lion_sched = configure_optimizer("lion", model, lr=1e-4) |
| | adafactor_opt, adafactor_sched = configure_optimizer("adafactor", model) |
| | |
| | # Use with BitTransformerLM training |
| | from bit_transformer.training import train_loop |
| | |
| | train_loop(model, data, optimizer=muon_opt, scheduler=muon_sched) |
| | |
| | # Advanced compression |
| | from BTLM_Extensions import RLEEncoder, integrate_rle_with_training |
| | |
| | # Setup compression-aware training |
| | dataset, loss_fn = integrate_rle_with_training(model, data) |
| | |
| | # Benchmark optimizers |
| | from BTLM_Extensions import extension_manager |
| | |
| | results = extension_manager.benchmark_optimizers(model, test_data) |
| | print("Benchmark results:", results) |
| | """ |
| | pass |