e2b_fin2 / unsloth_compiled_cache /Linear_peft_forward.py

Full backup of working directory

65f521e verified 9 days ago

5.47 kB

	"""
	2026.6.7
	2026.6.9
	5.5.0
	1.7.0
	__UNSLOTH_VERSIONING__
	"""

	# Unsloth auto generated code
	# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU Lesser General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/>.


	try:
	from peft.tuners.lora.layer import VARIANT_KWARG_KEYS
	except ImportError:
	VARIANT_KWARG_KEYS = ['alora_offsets']
	torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False, 'debug': False, 'dce': True, 'memory_planning': True, 'coordinate_descent_tuning': False, 'trace.graph_diagram': False, 'compile_threads': 4, 'group_fusion': True, 'disable_progress': True, 'verbose_progress': False, 'triton.multi_kernel': 0, 'triton.use_block_ptr': False, 'triton.enable_persistent_tma_matmul': True, 'triton.autotune_at_compile_time': False, 'triton.cooperative_reductions': False, 'cuda.compile_opt_level': '-O2', 'cuda.enable_cuda_lto': True, 'combo_kernels': False, 'benchmark_combo_kernel': True, 'combo_kernel_foreach_dynamic_shapes': True}
	from torch import Tensor
	import torch
	import torch.nn as nn
	from torch.nn import functional as F
	from unsloth_zoo.temporary_patches.common import torch_compile
	from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable
	from peft.tuners.lora.variants import (Any, torch)


	torch_addmm = torch.addmm
	torch_add = torch.add
	# @torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
	def lora_forward(result, lora_A, lora_B, dropout, x, scaling):
	# Use result.dtype (bfloat16 from base layer) since x may have been cast to float32
	# by _cast_input_dtype when autocast is disabled
	target_dtype = result.dtype
	xA = dropout(x).to(target_dtype) @ lora_A.weight.to(target_dtype).t()
	# output = result + scaling * xA @ lora_B.weight.t()
	shape = result.shape
	output = torch_addmm(
	result.view(-1, shape[-1]),
	xA.view(-1, xA.shape[-1]),
	lora_B.weight.to(target_dtype).t(),
	alpha = scaling,
	beta = 1,
	).view(shape)

	bias = lora_B.bias
	if bias is not None:
	output = torch_add(
	output,
	bias.to(target_dtype),
	alpha = scaling,
	)
	return output
	pass

	def unsloth_forward(self, x: torch.Tensor, args: Any, *kwargs: Any) -> torch.Tensor:

	adapter_names = kwargs.pop("adapter_names", None)
	variant_kwargs = {k: kwargs.pop(k, None) for k in VARIANT_KWARG_KEYS} # don't pass these to base_layer

	if self.disable_adapters:
	if self.merged:
	self.unmerge()
	if not torch.is_autocast_enabled() and hasattr(self.base_layer, 'weight') and self.base_layer.weight is not None and not hasattr(self.base_layer.weight, 'quant_state') and x.dtype != self.base_layer.weight.dtype:
	x = x.to(self.base_layer.weight.dtype)
	result = self.base_layer(x, args, *kwargs)
	elif adapter_names is not None:
	result = self._mixed_batch_forward(x, args, adapter_names=adapter_names, variant_kwargs, *kwargs)
	elif self.merged:
	if not torch.is_autocast_enabled() and hasattr(self.base_layer, 'weight') and self.base_layer.weight is not None and not hasattr(self.base_layer.weight, 'quant_state') and x.dtype != self.base_layer.weight.dtype:
	x = x.to(self.base_layer.weight.dtype)
	result = self.base_layer(x, args, *kwargs)
	else:
	if not torch.is_autocast_enabled() and hasattr(self.base_layer, 'weight') and self.base_layer.weight is not None and not hasattr(self.base_layer.weight, 'quant_state') and x.dtype != self.base_layer.weight.dtype:
	x = x.to(self.base_layer.weight.dtype)
	result = self.base_layer(x, args, *kwargs)
	torch_result_dtype = result.dtype

	lora_A_keys = self.lora_A.keys()
	for active_adapter in self.active_adapters:
	if active_adapter not in lora_A_keys:
	continue

	lora_A = self.lora_A[active_adapter]
	lora_B = self.lora_B[active_adapter]
	dropout = self.lora_dropout[active_adapter]
	scaling = self.scaling[active_adapter]
	if not torch.is_autocast_enabled(): result, x = result.to(lora_A.weight.dtype), x.to(lora_A.weight.dtype)
	if active_adapter not in self.lora_variant: # vanilla LoRA
	return lora_forward(result, lora_A, lora_B, dropout, x, scaling).to(torch_result_dtype)
	else:
	result = self.lora_variant[active_adapter].forward(
	self,
	active_adapter=active_adapter,
	x=x,
	result=result,
	**variant_kwargs,
	**kwargs,
	)

	result = result.to(torch_result_dtype)

	return result