EXAONE-Path-2.5-backup / modeling_exaonepath_patch_encoder.py

Upload modeling_exaonepath_patch_encoder.py with huggingface_hub

3bcc19b verified 5 months ago

3.14 kB

	from __future__ import annotations

	"""Remote-code modeling file for EXAONE-Path Patch Encoder.

	Unified with slide-encoder style:
	- Keep this file small.
	- At runtime, download the repo snapshot and import the actual model code from
	`exaonepath/` (so we don't duplicate model definitions here).

	This requires the Hub repo to include `exaonepath/`.
	"""

	from typing import Any, Dict, Optional
	import importlib
	import sys

	from huggingface_hub import snapshot_download
	from torch import Tensor, nn
	from transformers import PretrainedConfig, PreTrainedModel


	class ExaonePathPatchEncoderConfig(PretrainedConfig):
	model_type = "exaonepath_patch_encoder"

	def __init__(
	self,
	image_encoder: str = "vitb",
	patch_size: int = 14,
	img_size=(224, 224),
	extra_kwargs: Dict[str, Any] \| None = None,
	**kwargs: Any,
	):
	self.image_encoder = str(image_encoder)
	self.patch_size = int(patch_size)
	if isinstance(img_size, int):
	img_size = (img_size, img_size)
	self.img_size = [int(img_size[0]), int(img_size[1])]
	self.extra_kwargs = dict(extra_kwargs or {})
	super().__init__(**kwargs)


	class ExaonePathPatchEncoderModel(PreTrainedModel):
	config_class = ExaonePathPatchEncoderConfig
	base_model_prefix = "patch_encoder"

	def __init__(self, config: ExaonePathPatchEncoderConfig):
	super().__init__(config)

	# Ensure the repo code (including `exaonepath/`) is available at runtime.
	repo_id = getattr(config, "_name_or_path", None) or getattr(config, "name_or_path", None)
	if isinstance(repo_id, str) and repo_id:
	local_root = snapshot_download(repo_id)
	if local_root not in sys.path:
	sys.path.insert(0, local_root)

	PatchEncoder = getattr(
	importlib.import_module("exaonepath.models.patch_encoder_hf"),
	"PatchEncoder",
	)

	extra = getattr(config, "extra_kwargs", None) or {}
	self.patch_encoder: nn.Module = PatchEncoder(
	image_encoder=config.image_encoder,
	patch_size=int(config.patch_size),
	img_size=list(config.img_size),
	**extra,
	)

	self.post_init()

	def forward(
	self,
	x: Optional[Tensor] = None,
	*,
	pixel_values: Optional[Tensor] = None,
	**kwargs: Any,
	) -> Tensor:
	"""Return patch embedding as a tensor.

	Returns:
	patch_embedding: [B, C]

	Note:
	The patch encoder produces a single embedding per input patch image.
	We return the tensor directly for the simplest user-facing API.
	"""

	# Prefer the simple positional argument `x`, but also accept the
	# Hugging Face convention `pixel_values=` for compatibility.
	if x is None:
	x = pixel_values
	if x is None:
	raise ValueError("Missing input tensor. Provide `x` (positional) or `pixel_values=`.")

	return self.patch_encoder(x)


	__all__ = ["ExaonePathPatchEncoderConfig", "ExaonePathPatchEncoderModel"]