Upload trained model to hfastino/broke-fish

Browse files

Files changed (11) hide show

added_tokens.json +13 -0
code/inference.py +292 -0
code/requirements.txt +12 -0
config.json +9 -0
encoder_config/config.json +33 -0
labels.json +1 -0
model.safetensors +3 -0
special_tokens_map.json +123 -0
spm.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +151 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "[C]": 128004,
+  "[DESCRIPTION]": 128010,
+  "[EXAMPLE]": 128008,
+  "[E]": 128005,
+  "[L]": 128007,
+  "[MASK]": 128000,
+  "[OUTPUT]": 128009,
+  "[P]": 128003,
+  "[R]": 128006,
+  "[SEP_STRUCT]": 128001,
+  "[SEP_TEXT]": 128002
+}

code/inference.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""
+SageMaker Multi-Model Endpoint inference script for GLiNER2.
+This script handles model loading and inference for the GLiNER2 Multi-Model Endpoint.
+Models are loaded dynamically based on the TargetModel header in the request.
+Key differences from single-model inference:
+- model_fn() receives the full path to the model directory (including model name)
+- Models are cached automatically by SageMaker MME
+- Multiple models can be loaded in memory simultaneously
+- LRU eviction when memory is full
+"""
+import json
+import os
+import sys
+import subprocess
+def _ensure_gliner2_installed():
+    """
+    Ensure gliner2 is installed. Install it dynamically if missing.
+    This is a workaround for SageMaker MME where requirements.txt
+    might not be installed automatically.
+    """
+    try:
+        import gliner2  # noqa: PLC0415
+        print(f"[MME] gliner2 version {gliner2.__version__} already installed")
+        return True
+    except ImportError:
+        print("[MME] gliner2 not found, installing...")
+        try:
+            # IMPORTANT: Use transformers<4.46 for compatibility with PyTorch 2.1.0
+            # (transformers 4.46+ requires PyTorch 2.3+ for torch.utils._pytree.register_pytree_node)
+            subprocess.check_call(
+                [
+                    sys.executable,
+                    "-m",
+                    "pip",
+                    "install",
+                    "--quiet",
+                    "--no-cache-dir",
+                    "gliner2==1.0.1",
+                    "transformers>=4.30.0,<4.46.0",
+                ]
+            )
+            print("[MME] ✓ gliner2 installed successfully")
+            return True
+        except subprocess.CalledProcessError as e:
+            print(f"[MME] ERROR: Failed to install gliner2: {e}")
+            return False
+# Ensure gliner2 is installed before importing torch (to avoid conflicts)
+_ensure_gliner2_installed()
+import torch  # noqa: E402
+# Add parent directory to path to potentially import from gliner_2_inference
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+class DummyModel:
+    """Placeholder model for MME container initialization"""
+    def __call__(self, *args, **kwargs):
+        raise ValueError("Container model invoked directly. Use TargetModel header.")
+    def extract_entities(self, *args, **kwargs):
+        raise ValueError("Container model invoked directly. Use TargetModel header.")
+    def classify_text(self, *args, **kwargs):
+        raise ValueError("Container model invoked directly. Use TargetModel header.")
+    def extract_json(self, *args, **kwargs):
+        raise ValueError("Container model invoked directly. Use TargetModel header.")
+def model_fn(model_dir):
+    """
+    Load the GLiNER2 model from the model directory.
+    For Multi-Model Endpoints, SageMaker passes the full path to the specific
+    model being loaded, e.g., /opt/ml/models/<model_name>/
+    Args:
+        model_dir: The directory where model artifacts are extracted
+    Returns:
+        The loaded GLiNER2 model
+    """
+    print(f"[MME] Loading model from: {model_dir}")
+    try:
+        print(f"[MME] Contents: {os.listdir(model_dir)}")
+    except Exception as e:
+        print(f"[MME] Could not list directory contents: {e}")
+    # Import GLiNER2 here (should be installed by _ensure_gliner2_installed)
+    try:
+        from gliner2 import GLiNER2  # noqa: PLC0415
+    except ImportError as e:
+        print(f"[MME] ERROR: gliner2 import failed: {e}")
+        print("[MME] Attempting to install gliner2...")
+        if _ensure_gliner2_installed():
+            from gliner2 import GLiNER2  # noqa: PLC0415
+        else:
+            GLiNER2 = None
+    # Detect device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"[MME] Using device: {device}")
+    if torch.cuda.is_available():
+        print(f"[MME] GPU: {torch.cuda.get_device_name(0)}")
+        print(f"[MME] CUDA version: {torch.version.cuda}")
+        mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
+        print(f"[MME] GPU memory: {mem_gb:.2f} GB")
+    # Get HuggingFace token if available
+    hf_token = os.environ.get("HF_TOKEN")
+    # Check if this is the container model (placeholder)
+    if os.path.exists(os.path.join(model_dir, "mme_container.txt")):
+        print("[MME] Container model detected - returning dummy model")
+        return DummyModel()
+    if GLiNER2 is None:
+        raise ImportError("gliner2 package required but not found")
+    # Check if model is already extracted in model_dir
+    if os.path.exists(os.path.join(model_dir, "config.json")):
+        print("[MME] Loading model from extracted artifacts...")
+        model = GLiNER2.from_pretrained(model_dir, token=hf_token)
+    elif os.path.exists(os.path.join(model_dir, "download_at_runtime.txt")):
+        # Fallback: download from HuggingFace
+        print("[MME] Model not in archive, downloading from HuggingFace...")
+        model_name = os.environ.get("GLINER_MODEL", "fastino/gliner2-base-v1")
+        print(f"[MME] Downloading model: {model_name}")
+        model = GLiNER2.from_pretrained(model_name, token=hf_token)
+    else:
+        # Final fallback
+        model_name = os.environ.get("GLINER_MODEL", "fastino/gliner2-base-v1")
+        print(f"[MME] Model directory empty, downloading: {model_name}")
+        model = GLiNER2.from_pretrained(model_name, token=hf_token)
+    # Move model to GPU if available
+    print(f"[MME] Moving model to {device}...")
+    model = model.to(device)
+    # Enable half precision on GPU for memory efficiency
+    if torch.cuda.is_available():
+        print("[MME] Converting to fp16...")
+        model = model.half()
+    # Memory optimizations for GPU
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+        torch.cuda.empty_cache()
+        # Reserve memory for multiple models in MME
+        torch.cuda.set_per_process_memory_fraction(0.85)
+        print("[MME] GPU memory optimizations enabled")
+    print(f"[MME] ✓ Model loaded successfully on {device}")
+    return model
+def input_fn(request_body, request_content_type):
+    """
+    Deserialize and prepare the input data for prediction.
+    Args:
+        request_body: The request body
+        request_content_type: The content type of the request
+    Returns:
+        Parsed input data as a dictionary
+    """
+    if request_content_type == "application/json":
+        input_data = json.loads(request_body)
+        return input_data
+    else:
+        raise ValueError(f"Unsupported content type: {request_content_type}")
+def predict_fn(input_data, model):
+    """
+    Run prediction on the input data using the loaded model.
+    Args:
+        input_data: Dictionary containing:
+            - task: One of 'extract_entities', 'classify_text', or 'extract_json'
+            - text: Text to process (string) or list of texts (for batch processing)
+            - schema: Schema for extraction (format depends on task)
+            - threshold: Optional confidence threshold (default: 0.5)
+        model: The loaded GLiNER2 model
+    Returns:
+        Task-specific results (single result or list of results for batch)
+    """
+    # Clear CUDA cache before processing
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    text = input_data.get("text")
+    task = input_data.get("task", "extract_entities")
+    schema = input_data.get("schema")
+    threshold = input_data.get("threshold", 0.5)
+    if not text:
+        raise ValueError("'text' field is required")
+    if not schema:
+        raise ValueError("'schema' field is required")
+    # Detect batch mode
+    is_batch = isinstance(text, list)
+    if is_batch and len(text) == 0:
+        raise ValueError("'text' list cannot be empty")
+    # Use inference_mode for faster inference
+    with torch.inference_mode():
+        if task == "extract_entities":
+            if is_batch:
+                if hasattr(model, "batch_extract_entities"):
+                    result = model.batch_extract_entities(
+                        text, schema, threshold=threshold
+                    )
+                elif hasattr(model, "batch_predict_entities"):
+                    result = model.batch_predict_entities(
+                        text, schema, threshold=threshold
+                    )
+                else:
+                    result = [
+                        model.extract_entities(t, schema, threshold=threshold)
+                        for t in text
+                    ]
+            else:
+                result = model.extract_entities(text, schema, threshold=threshold)
+            return result
+        elif task == "classify_text":
+            if is_batch:
+                if hasattr(model, "batch_classify_text"):
+                    result = model.batch_classify_text(
+                        text, schema, threshold=threshold
+                    )
+                else:
+                    result = [
+                        model.classify_text(t, schema, threshold=threshold)
+                        for t in text
+                    ]
+            else:
+                result = model.classify_text(text, schema, threshold=threshold)
+            return result
+        elif task == "extract_json":
+            if is_batch:
+                if hasattr(model, "batch_extract_json"):
+                    result = model.batch_extract_json(text, schema, threshold=threshold)
+                else:
+                    result = [
+                        model.extract_json(t, schema, threshold=threshold) for t in text
+                    ]
+            else:
+                result = model.extract_json(text, schema, threshold=threshold)
+            return result
+        else:
+            raise ValueError(
+                f"Unsupported task: {task}. "
+                "Must be one of: extract_entities, classify_text, extract_json"
+            )
+def output_fn(prediction, response_content_type):
+    """
+    Serialize the prediction output.
+    Args:
+        prediction: The prediction result
+        response_content_type: The desired response content type
+    Returns:
+        Serialized prediction
+    """
+    if response_content_type == "application/json":
+        return json.dumps(prediction)
+    else:
+        raise ValueError(f"Unsupported response content type: {response_content_type}")

code/requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+# Requirements for GLiNER2 Multi-Model Endpoint
+# NOTE: These are installed when the SageMaker container starts
+#
+# IMPORTANT: SageMaker PyTorch 2.1.0 container requires transformers<4.46
+# (transformers 4.46+ uses torch.utils._pytree.register_pytree_node which needs PyTorch 2.3+)
+# Core dependencies - gliner2 must be installed for model loading
+gliner2==1.0.1
+transformers>=4.30.0,<4.46.0
+# JSON handling
+orjson>=3.9.0

config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_attn_implementation_autoset": true,
+  "counting_layer": "count_lstm_v2",
+  "max_width": 8,
+  "model_name": "microsoft/deberta-v3-base",
+  "model_type": "extractor",
+  "token_pooling": "first",
+  "transformers_version": "4.57.6"
+}

encoder_config/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_attn_implementation_autoset": true,
+  "attention_probs_dropout_prob": 0.1,
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "transformers_version": "4.57.6",
+  "type_vocab_size": 0,
+  "vocab_size": 128011
+}

labels.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["certificate_number", "date", "destination", "fish_species", "health_status", "inspector_name", "organization", "origin_location", "quantity", "weight"]

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cdd9f766a4626b85c25e5000f3681049cd28c173b0e306172f405e8533cba1b
+size 833938108

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[SEP_STRUCT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[SEP_TEXT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[P]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[C]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[E]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[R]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[L]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[EXAMPLE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[OUTPUT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[DESCRIPTION]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,151 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "[SEP_STRUCT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "[SEP_TEXT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "[P]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "[C]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "[E]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "[R]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "[L]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "[EXAMPLE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "[OUTPUT]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128010": {
+      "content": "[DESCRIPTION]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "[SEP_STRUCT]",
+    "[SEP_TEXT]",
+    "[P]",
+    "[C]",
+    "[E]",
+    "[R]",
+    "[L]",
+    "[EXAMPLE]",
+    "[OUTPUT]",
+    "[DESCRIPTION]"
+  ],
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}